blob: 4072c31afccdb2cfc98ebcdad56c8cd85663653e [file] [log] [blame]
developer7eb15dc2023-06-14 17:44:03 +08001From 4c2019ace3cb414c5a8b7939a7368266d6a6339a Mon Sep 17 00:00:00 2001
2From: Bo-Cun Chen <bc-bocun.chen@mediatek.com>
3Date: Wed, 14 Jun 2023 16:24:07 +0800
4Subject: [PATCH] 999-3003-mt7986-backport-nf-hw-offload-framework-230525
developer8cb3ac72022-07-04 10:55:14 +08005
6---
7 drivers/net/ethernet/mediatek/Makefile | 3 +-
developer7eb15dc2023-06-14 17:44:03 +08008 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 86 +-
9 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 111 +-
10 drivers/net/ethernet/mediatek/mtk_ppe.c | 1089 +++++++++++++++
11 drivers/net/ethernet/mediatek/mtk_ppe.h | 384 ++++++
12 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 221 +++
13 .../net/ethernet/mediatek/mtk_ppe_offload.c | 566 ++++++++
14 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 172 +++
developer8cb3ac72022-07-04 10:55:14 +080015 drivers/net/ppp/ppp_generic.c | 22 +
16 drivers/net/ppp/pppoe.c | 24 +
developer7eb15dc2023-06-14 17:44:03 +080017 include/linux/netdevice.h | 71 +
developer8cb3ac72022-07-04 10:55:14 +080018 include/linux/ppp_channel.h | 3 +
19 include/net/dsa.h | 10 +
20 include/net/flow_offload.h | 4 +
21 include/net/ip6_route.h | 5 +-
22 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
23 include/net/netfilter/nf_conntrack.h | 12 +
24 include/net/netfilter/nf_conntrack_acct.h | 11 +
developer7eb15dc2023-06-14 17:44:03 +080025 include/net/netfilter/nf_flow_table.h | 265 +++-
developer8cb3ac72022-07-04 10:55:14 +080026 include/net/netns/conntrack.h | 6 +
27 .../linux/netfilter/nf_conntrack_common.h | 9 +-
28 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
29 net/8021q/vlan_dev.c | 21 +
30 net/bridge/br_device.c | 49 +
31 net/bridge/br_private.h | 20 +
32 net/bridge/br_vlan.c | 55 +
33 net/core/dev.c | 46 +
34 net/dsa/dsa.c | 9 +
developer7eb15dc2023-06-14 17:44:03 +080035 net/dsa/slave.c | 37 +-
developer8cb3ac72022-07-04 10:55:14 +080036 net/ipv4/netfilter/Kconfig | 4 +-
37 net/ipv6/ip6_output.c | 2 +-
38 net/ipv6/netfilter/Kconfig | 3 +-
39 net/ipv6/route.c | 22 +-
40 net/netfilter/Kconfig | 14 +-
41 net/netfilter/Makefile | 4 +-
42 net/netfilter/nf_conntrack_core.c | 20 +-
43 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
44 net/netfilter/nf_conntrack_proto_udp.c | 4 +
45 net/netfilter/nf_conntrack_standalone.c | 34 +-
developer7eb15dc2023-06-14 17:44:03 +080046 net/netfilter/nf_flow_table_core.c | 445 +++---
47 net/netfilter/nf_flow_table_ip.c | 450 ++++---
48 net/netfilter/nf_flow_table_offload.c | 1194 +++++++++++++++++
49 net/netfilter/xt_FLOWOFFLOAD.c | 795 +++++++++++
50 43 files changed, 5883 insertions(+), 443 deletions(-)
developer8cb3ac72022-07-04 10:55:14 +080051 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
52 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
53 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
54 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
55 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
56 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
57 create mode 100644 net/netfilter/nf_flow_table_offload.c
58 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
59
60diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
developer7eb15dc2023-06-14 17:44:03 +080061index 13d852c..cfffde4 100755
developer8cb3ac72022-07-04 10:55:14 +080062--- a/drivers/net/ethernet/mediatek/Makefile
63+++ b/drivers/net/ethernet/mediatek/Makefile
developer7eb15dc2023-06-14 17:44:03 +080064@@ -4,6 +4,7 @@
developer8cb3ac72022-07-04 10:55:14 +080065 #
66
67 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080068-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
69+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080070+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
71 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
developer7eb15dc2023-06-14 17:44:03 +080072 obj-$(CONFIG_XFRM_OFFLOAD) += mtk_ipsec.o
developer8cb3ac72022-07-04 10:55:14 +080073diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer7eb15dc2023-06-14 17:44:03 +080074index 23e2172..76ba1e8 100755
developer8cb3ac72022-07-04 10:55:14 +080075--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
76+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer7eb15dc2023-06-14 17:44:03 +080077@@ -2108,6 +2108,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
78 unsigned int pktlen, *rxdcsum;
79 struct net_device *netdev = NULL;
80 dma_addr_t dma_addr = 0;
81+ u32 hash, reason;
82 int mac = 0;
83
84 if (eth->hwlro)
85@@ -2188,10 +2189,21 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
86 skb->dev = netdev;
87 skb_put(skb, pktlen);
88
89- if ((MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_RX_V2)))
90+ if ((MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_RX_V2))) {
91+ reason = FIELD_GET(MTK_RXD5_PPE_CPU_REASON, trxd.rxd5);
92+ hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY;
93+ if (hash != MTK_RXD5_FOE_ENTRY)
94+ skb_set_hash(skb, jhash_1word(hash, 0),
95+ PKT_HASH_TYPE_L4);
96 rxdcsum = &trxd.rxd3;
97- else
98+ } else {
99+ reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4);
100+ hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
101+ if (hash != MTK_RXD4_FOE_ENTRY)
102+ skb_set_hash(skb, jhash_1word(hash, 0),
103+ PKT_HASH_TYPE_L4);
104 rxdcsum = &trxd.rxd4;
105+ }
106
107 if (*rxdcsum & eth->soc->txrx.rx_dma_l4_valid)
108 skb->ip_summed = CHECKSUM_UNNECESSARY;
109@@ -2199,6 +2211,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
110 skb_checksum_none_assert(skb);
111 skb->protocol = eth_type_trans(skb, netdev);
112
113+ if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
114+ mtk_ppe_check_skb(eth->ppe[0], skb, hash);
115+
116 if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
117 if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_RX_V2)) {
118 if (trxd.rxd3 & RX_DMA_VTAG_V2)
119@@ -3472,8 +3487,10 @@ static int mtk_open(struct net_device *dev)
120 {
121 struct mtk_mac *mac = netdev_priv(dev);
122 struct mtk_eth *eth = mac->hw;
123+ const struct mtk_soc_data *soc = eth->soc;
developerdca0fde2022-12-14 11:40:35 +0800124 struct mtk_phylink_priv *phylink_priv = &mac->phylink_priv;
developer7eb15dc2023-06-14 17:44:03 +0800125 u32 id = mtk_mac2xgmii_id(eth, mac->id);
126+ u32 gdm_config;
developerdca0fde2022-12-14 11:40:35 +0800127 int err, i;
128 struct device_node *phy_node;
developer7eb15dc2023-06-14 17:44:03 +0800129
130@@ -3487,10 +3504,11 @@ static int mtk_open(struct net_device *dev)
131 /* we run 2 netdevs on the same dma ring so we only bring it up once */
132 if (!refcount_read(&eth->dma_refcnt)) {
133 int err = mtk_start_dma(eth);
134-
135 if (err)
136 return err;
developer8cb3ac72022-07-04 10:55:14 +0800137
developer7eb15dc2023-06-14 17:44:03 +0800138+ for (i = 0; i < ARRAY_SIZE(eth->ppe); i++)
139+ mtk_ppe_start(eth->ppe[i]);
140
141 /* Indicates CDM to parse the MTK special tag from CPU */
142 if (netdev_uses_dsa(dev)) {
143@@ -3553,7 +3571,10 @@ static int mtk_open(struct net_device *dev)
144 regmap_write(eth->sgmii->pcs[id].regmap,
145 SGMSYS_QPHY_PWR_STATE_CTRL, 0);
developer8cb3ac72022-07-04 10:55:14 +0800146
developerdca0fde2022-12-14 11:40:35 +0800147- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
developer7eb15dc2023-06-14 17:44:03 +0800148+ gdm_config = soc->offload_version ? soc->reg_map->gdma_to_ppe0
149+ : MTK_GDMA_TO_PDMA;
developer8cb3ac72022-07-04 10:55:14 +0800150+
developerdca0fde2022-12-14 11:40:35 +0800151+ mtk_gdm_config(eth, mac->id, gdm_config);
developer8cb3ac72022-07-04 10:55:14 +0800152
developerdca0fde2022-12-14 11:40:35 +0800153 return 0;
154 }
developer7eb15dc2023-06-14 17:44:03 +0800155@@ -3633,6 +3654,9 @@ static int mtk_stop(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +0800156
157 mtk_dma_free(eth);
158
developer7eb15dc2023-06-14 17:44:03 +0800159+ for (i = 0; i < ARRAY_SIZE(eth->ppe); i++)
160+ mtk_ppe_stop(eth->ppe[i]);
developer8cb3ac72022-07-04 10:55:14 +0800161+
162 return 0;
163 }
164
developer7eb15dc2023-06-14 17:44:03 +0800165@@ -4408,6 +4432,7 @@ static const struct net_device_ops mtk_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +0800166 #ifdef CONFIG_NET_POLL_CONTROLLER
167 .ndo_poll_controller = mtk_poll_controller,
168 #endif
169+ .ndo_setup_tc = mtk_eth_setup_tc,
170 };
171
172 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
developer7eb15dc2023-06-14 17:44:03 +0800173@@ -4828,6 +4853,27 @@ static int mtk_probe(struct platform_device *pdev)
developer8cb3ac72022-07-04 10:55:14 +0800174 goto err_free_dev;
175 }
176
177+ if (eth->soc->offload_version) {
developer7eb15dc2023-06-14 17:44:03 +0800178+ u32 num_ppe;
179+
180+ num_ppe = MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) ? 2 : 1;
181+ num_ppe = min_t(u32, ARRAY_SIZE(eth->ppe), num_ppe);
182+ for (i = 0; i < num_ppe; i++) {
183+ u32 ppe_addr = eth->soc->reg_map->ppe_base[i];
184+
185+ eth->ppe[i] = mtk_ppe_init(eth, eth->base + ppe_addr, i);
186+
187+ if (!eth->ppe[i]) {
188+ err = -ENOMEM;
189+ goto err_free_dev;
190+ }
191+ }
developer8cb3ac72022-07-04 10:55:14 +0800192+
193+ err = mtk_eth_offload_init(eth);
194+ if (err)
195+ goto err_free_dev;
196+ }
197+
198 for (i = 0; i < MTK_MAX_DEVS; i++) {
199 if (!eth->netdev[i])
200 continue;
developer7eb15dc2023-06-14 17:44:03 +0800201@@ -4931,6 +4977,10 @@ static const struct mtk_soc_data mt2701_data = {
developer8cb3ac72022-07-04 10:55:14 +0800202 .required_clks = MT7623_CLKS_BITMAP,
203 .required_pctl = true,
204 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800205+ .has_accounting = false,
206+ .hash_offset = 2,
207+ .offload_version = 1,
208+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
209 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800210 .txrx = {
211 .txd_size = sizeof(struct mtk_tx_dma),
developer7eb15dc2023-06-14 17:44:03 +0800212@@ -4948,6 +4998,10 @@ static const struct mtk_soc_data mt7621_data = {
developer8cb3ac72022-07-04 10:55:14 +0800213 .required_clks = MT7621_CLKS_BITMAP,
214 .required_pctl = false,
215 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800216+ .has_accounting = false,
217+ .hash_offset = 2,
218+ .offload_version = 1,
219+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
220 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800221 .txrx = {
222 .txd_size = sizeof(struct mtk_tx_dma),
developer7eb15dc2023-06-14 17:44:03 +0800223@@ -4966,6 +5020,10 @@ static const struct mtk_soc_data mt7622_data = {
developer8cb3ac72022-07-04 10:55:14 +0800224 .required_clks = MT7622_CLKS_BITMAP,
225 .required_pctl = false,
226 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800227+ .has_accounting = true,
228+ .hash_offset = 2,
developer8cb3ac72022-07-04 10:55:14 +0800229+ .offload_version = 2,
developer7eb15dc2023-06-14 17:44:03 +0800230+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
231 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800232 .txrx = {
233 .txd_size = sizeof(struct mtk_tx_dma),
developer7eb15dc2023-06-14 17:44:03 +0800234@@ -4983,6 +5041,10 @@ static const struct mtk_soc_data mt7623_data = {
developer8cb3ac72022-07-04 10:55:14 +0800235 .required_clks = MT7623_CLKS_BITMAP,
236 .required_pctl = true,
237 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800238+ .has_accounting = false,
239+ .hash_offset = 2,
240+ .offload_version = 1,
241+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
242 .rss_num = 0,
243 .txrx = {
244 .txd_size = sizeof(struct mtk_tx_dma),
245@@ -5001,6 +5063,10 @@ static const struct mtk_soc_data mt7629_data = {
246 .required_clks = MT7629_CLKS_BITMAP,
247 .required_pctl = false,
248 .has_sram = false,
249+ .has_accounting = true,
250+ .hash_offset = 2,
251+ .offload_version = 2,
252+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
253 .rss_num = 0,
254 .txrx = {
255 .txd_size = sizeof(struct mtk_tx_dma),
256@@ -5019,6 +5085,10 @@ static const struct mtk_soc_data mt7986_data = {
257 .required_clks = MT7986_CLKS_BITMAP,
258 .required_pctl = false,
259 .has_sram = true,
260+ .has_accounting = true,
261+ .hash_offset = 4,
developer8cb3ac72022-07-04 10:55:14 +0800262+ .offload_version = 2,
developer7eb15dc2023-06-14 17:44:03 +0800263+ .foe_entry_size = sizeof(struct mtk_foe_entry),
264 .rss_num = 0,
265 .txrx = {
266 .txd_size = sizeof(struct mtk_tx_dma_v2),
267@@ -5037,6 +5107,10 @@ static const struct mtk_soc_data mt7981_data = {
268 .required_clks = MT7981_CLKS_BITMAP,
269 .required_pctl = false,
270 .has_sram = true,
271+ .has_accounting = true,
272+ .hash_offset = 4,
273+ .offload_version = 2,
274+ .foe_entry_size = sizeof(struct mtk_foe_entry),
275 .rss_num = 0,
276 .txrx = {
277 .txd_size = sizeof(struct mtk_tx_dma_v2),
278@@ -5072,6 +5146,10 @@ static const struct mtk_soc_data rt5350_data = {
279 .required_clks = MT7628_CLKS_BITMAP,
280 .required_pctl = false,
281 .has_sram = false,
282+ .has_accounting = false,
283+ .hash_offset = 2,
284+ .offload_version = 1,
285+ .foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
286 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800287 .txrx = {
288 .txd_size = sizeof(struct mtk_tx_dma),
developer8cb3ac72022-07-04 10:55:14 +0800289diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
developer7eb15dc2023-06-14 17:44:03 +0800290index 06c2b0a..54790df 100755
developer8cb3ac72022-07-04 10:55:14 +0800291--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
292+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
293@@ -15,6 +15,8 @@
294 #include <linux/u64_stats_sync.h>
295 #include <linux/refcount.h>
296 #include <linux/phylink.h>
297+#include <linux/rhashtable.h>
298+#include "mtk_ppe.h"
299
300 #define MTK_QDMA_PAGE_SIZE 2048
301 #define MTK_MAX_RX_LENGTH 1536
developer7eb15dc2023-06-14 17:44:03 +0800302@@ -44,7 +46,8 @@
developer8cb3ac72022-07-04 10:55:14 +0800303 NETIF_F_HW_VLAN_CTAG_TX | \
304 NETIF_F_SG | NETIF_F_TSO | \
305 NETIF_F_TSO6 | \
306- NETIF_F_IPV6_CSUM)
307+ NETIF_F_IPV6_CSUM |\
308+ NETIF_F_HW_TC)
309 #define MTK_SET_FEATURES (NETIF_F_LRO | \
310 NETIF_F_HW_VLAN_CTAG_RX)
311 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
developer7eb15dc2023-06-14 17:44:03 +0800312@@ -121,6 +124,7 @@
developer8cb3ac72022-07-04 10:55:14 +0800313 #define MTK_GDMA_UCS_EN BIT(20)
developer7eb15dc2023-06-14 17:44:03 +0800314 #define MTK_GDMA_STRP_CRC BIT(16)
developer8cb3ac72022-07-04 10:55:14 +0800315 #define MTK_GDMA_TO_PDMA 0x0
316+#define MTK_GDMA_TO_PPE 0x4444
317 #define MTK_GDMA_DROP_ALL 0x7777
318
developer7eb15dc2023-06-14 17:44:03 +0800319 /* GDM Egress Control Register */
320@@ -604,11 +608,22 @@
developer8cb3ac72022-07-04 10:55:14 +0800321 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
322 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
323
324+/* QDMA descriptor rxd4 */
325+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
326+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
327+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
328+#define MTK_RXD4_ALG GENMASK(31, 22)
329+
330 /* QDMA descriptor rxd4 */
331 #define RX_DMA_L4_VALID BIT(24)
332 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
developer7eb15dc2023-06-14 17:44:03 +0800333 #define RX_DMA_SPECIAL_TAG BIT(22) /* switch header in packet */
334
335+/* PDMA descriptor rxd5 */
336+#define MTK_RXD5_FOE_ENTRY GENMASK(14, 0)
337+#define MTK_RXD5_PPE_CPU_REASON GENMASK(22, 18)
338+#define MTK_RXD5_SRC_PORT GENMASK(29, 26)
339+
340 #define RX_DMA_GET_SPORT(_x) (((_x) >> RX_DMA_SPORT_SHIFT) & RX_DMA_SPORT_MASK)
341 #define RX_DMA_GET_SPORT_V2(_x) (((_x) >> RX_DMA_SPORT_SHIFT_V2) & RX_DMA_SPORT_MASK_V2)
342
343@@ -1598,6 +1613,10 @@ struct mtk_reg_map {
344 * the target SoC
345 * @required_pctl A bool value to show whether the SoC requires
346 * the extra setup for those pins used by GMAC.
347+ * @hash_offset Flow table hash offset.
348+ * @foe_entry_size Foe table entry size.
349+ * @has_accounting Bool indicating support for accounting of
350+ * offloaded flows.
351 * @txd_size Tx DMA descriptor size.
352 * @rxd_size Rx DMA descriptor size.
353 * @rx_dma_l4_valid Rx DMA valid register mask.
354@@ -1611,8 +1630,12 @@ struct mtk_soc_data {
355 u64 caps;
356 u64 required_clks;
developer8cb3ac72022-07-04 10:55:14 +0800357 bool required_pctl;
358+ u8 offload_version;
developer7eb15dc2023-06-14 17:44:03 +0800359+ u8 hash_offset;
360+ u16 foe_entry_size;
developer8cb3ac72022-07-04 10:55:14 +0800361 netdev_features_t hw_features;
362 bool has_sram;
developer7eb15dc2023-06-14 17:44:03 +0800363+ bool has_accounting;
364 struct {
365 u32 txd_size;
366 u32 rxd_size;
367@@ -1803,6 +1826,9 @@ struct mtk_eth {
developer8cb3ac72022-07-04 10:55:14 +0800368 int ip_align;
369 spinlock_t syscfg0_lock;
370 struct timer_list mtk_dma_monitor_timer;
371+
developer7eb15dc2023-06-14 17:44:03 +0800372+ struct mtk_ppe *ppe[2];
developer8cb3ac72022-07-04 10:55:14 +0800373+ struct rhashtable flow_table;
374 };
375
376 /* struct mtk_mac - the structure that holds the info about the MACs of the
developer7eb15dc2023-06-14 17:44:03 +0800377@@ -1838,6 +1864,86 @@ extern const struct of_device_id of_mtk_match[];
378 extern u32 mtk_hwlro_stats_ebl;
379 extern u32 dbg_show_level;
380
381+static inline struct mtk_foe_entry *
382+mtk_foe_get_entry(struct mtk_ppe *ppe, u16 hash)
383+{
384+ const struct mtk_soc_data *soc = ppe->eth->soc;
385+
386+ return ppe->foe_table + hash * soc->foe_entry_size;
387+}
388+
389+static inline u32 mtk_get_ib1_ts_mask(struct mtk_eth *eth)
390+{
391+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
392+ return MTK_FOE_IB1_BIND_TIMESTAMP_V2;
393+
394+ return MTK_FOE_IB1_BIND_TIMESTAMP;
395+}
396+
397+static inline u32 mtk_get_ib1_ppoe_mask(struct mtk_eth *eth)
398+{
399+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
400+ return MTK_FOE_IB1_BIND_PPPOE_V2;
401+
402+ return MTK_FOE_IB1_BIND_PPPOE;
403+}
404+
405+static inline u32 mtk_get_ib1_vlan_tag_mask(struct mtk_eth *eth)
406+{
407+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
408+ return MTK_FOE_IB1_BIND_VLAN_TAG_V2;
409+
410+ return MTK_FOE_IB1_BIND_VLAN_TAG;
411+}
412+
413+static inline u32 mtk_get_ib1_vlan_layer_mask(struct mtk_eth *eth)
414+{
415+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
416+ return MTK_FOE_IB1_BIND_VLAN_LAYER_V2;
417+
418+ return MTK_FOE_IB1_BIND_VLAN_LAYER;
419+}
420+
421+static inline u32 mtk_prep_ib1_vlan_layer(struct mtk_eth *eth, u32 val)
422+{
423+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
424+ return FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER_V2, val);
425+
426+ return FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, val);
427+}
428+
429+static inline u32 mtk_get_ib1_vlan_layer(struct mtk_eth *eth, u32 val)
430+{
431+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
432+ return FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER_V2, val);
433+
434+ return FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, val);
435+}
436+
437+static inline u32 mtk_get_ib1_pkt_type_mask(struct mtk_eth *eth)
438+{
439+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
440+ return MTK_FOE_IB1_PACKET_TYPE_V2;
441+
442+ return MTK_FOE_IB1_PACKET_TYPE;
443+}
444+
445+static inline u32 mtk_get_ib1_pkt_type(struct mtk_eth *eth, u32 val)
446+{
447+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
448+ return FIELD_GET(MTK_FOE_IB1_PACKET_TYPE_V2, val);
449+
450+ return FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, val);
451+}
452+
453+static inline u32 mtk_get_ib2_multicast_mask(struct mtk_eth *eth)
454+{
455+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
456+ return MTK_FOE_IB2_MULTICAST_V2;
457+
458+ return MTK_FOE_IB2_MULTICAST;
459+}
460+
461 /* read the hardware status register */
462 void mtk_stats_update_mac(struct mtk_mac *mac);
463
464@@ -1863,6 +1969,9 @@ int mtk_usxgmii_init(struct mtk_eth *eth, struct device_node *r);
465 int mtk_toprgu_init(struct mtk_eth *eth, struct device_node *r);
developer1fb19c92023-03-07 23:45:23 +0800466 int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
developer8cb3ac72022-07-04 10:55:14 +0800467
468+int mtk_eth_offload_init(struct mtk_eth *eth);
469+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
470+ void *type_data);
developer1fb19c92023-03-07 23:45:23 +0800471 void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
developer7eb15dc2023-06-14 17:44:03 +0800472 int mtk_rss_set_indr_tbl(struct mtk_eth *eth, int num);
473 #endif /* MTK_ETH_H */
developer8cb3ac72022-07-04 10:55:14 +0800474diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
475new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +0800476index 0000000..de34366
developer8cb3ac72022-07-04 10:55:14 +0800477--- /dev/null
478+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
developer7eb15dc2023-06-14 17:44:03 +0800479@@ -0,0 +1,1089 @@
developer8cb3ac72022-07-04 10:55:14 +0800480+// SPDX-License-Identifier: GPL-2.0-only
481+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
482+
483+#include <linux/kernel.h>
484+#include <linux/io.h>
485+#include <linux/iopoll.h>
486+#include <linux/etherdevice.h>
487+#include <linux/platform_device.h>
developer7eb15dc2023-06-14 17:44:03 +0800488+#include <linux/if_ether.h>
489+#include <linux/if_vlan.h>
490+#include <net/dst_metadata.h>
491+#include <net/dsa.h>
492+#include "mtk_eth_soc.h"
developer8cb3ac72022-07-04 10:55:14 +0800493+#include "mtk_ppe.h"
494+#include "mtk_ppe_regs.h"
495+
developer7eb15dc2023-06-14 17:44:03 +0800496+static DEFINE_SPINLOCK(ppe_lock);
497+
498+static const struct rhashtable_params mtk_flow_l2_ht_params = {
499+ .head_offset = offsetof(struct mtk_flow_entry, l2_node),
500+ .key_offset = offsetof(struct mtk_flow_entry, data.bridge),
501+ .key_len = offsetof(struct mtk_foe_bridge, key_end),
502+ .automatic_shrinking = true,
503+};
504+
developer8cb3ac72022-07-04 10:55:14 +0800505+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
506+{
507+ writel(val, ppe->base + reg);
508+}
509+
510+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
511+{
512+ return readl(ppe->base + reg);
513+}
514+
515+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
516+{
517+ u32 val;
518+
519+ val = ppe_r32(ppe, reg);
520+ val &= ~mask;
521+ val |= set;
522+ ppe_w32(ppe, reg, val);
523+
524+ return val;
525+}
526+
527+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
528+{
529+ return ppe_m32(ppe, reg, 0, val);
530+}
531+
532+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
533+{
534+ return ppe_m32(ppe, reg, val, 0);
535+}
536+
developer7eb15dc2023-06-14 17:44:03 +0800537+static u32 mtk_eth_timestamp(struct mtk_eth *eth)
538+{
539+ return mtk_r32(eth, 0x0010) & mtk_get_ib1_ts_mask(eth);
540+}
541+
developer8cb3ac72022-07-04 10:55:14 +0800542+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
543+{
544+ int ret;
545+ u32 val;
546+
547+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
548+ !(val & MTK_PPE_GLO_CFG_BUSY),
549+ 20, MTK_PPE_WAIT_TIMEOUT_US);
550+
551+ if (ret)
552+ dev_err(ppe->dev, "PPE table busy");
553+
554+ return ret;
555+}
556+
developer7eb15dc2023-06-14 17:44:03 +0800557+static int mtk_ppe_mib_wait_busy(struct mtk_ppe *ppe)
558+{
559+ int ret;
560+ u32 val;
561+
562+ ret = readl_poll_timeout_atomic(ppe->base + MTK_PPE_MIB_SER_CR, val,
563+ !(val & MTK_PPE_MIB_SER_CR_ST),
564+ 20, MTK_PPE_WAIT_TIMEOUT_US);
565+
566+ if (ret)
567+ dev_err(ppe->dev, "MIB table busy");
568+
569+ return ret;
570+}
571+
572+static inline struct mtk_foe_accounting *
573+mtk_ppe_acct_data(struct mtk_ppe *ppe, u16 index)
574+{
575+ if (!ppe->acct_table)
576+ return NULL;
577+
578+ return ppe->acct_table + index * sizeof(struct mtk_foe_accounting);
579+}
580+
581+struct mtk_foe_accounting *mtk_ppe_mib_entry_read(struct mtk_ppe *ppe, u16 index)
582+{
583+ u32 byte_cnt_low, byte_cnt_high, pkt_cnt_low, pkt_cnt_high;
584+ u32 val, cnt_r0, cnt_r1, cnt_r2;
585+ struct mtk_foe_accounting *acct;
586+ int ret;
587+
588+ val = FIELD_PREP(MTK_PPE_MIB_SER_CR_ADDR, index) | MTK_PPE_MIB_SER_CR_ST;
589+ ppe_w32(ppe, MTK_PPE_MIB_SER_CR, val);
590+
591+ acct = mtk_ppe_acct_data(ppe, index);
592+ if (!acct)
593+ return NULL;
594+
595+ ret = mtk_ppe_mib_wait_busy(ppe);
596+ if (ret)
597+ return acct;
598+
599+ cnt_r0 = readl(ppe->base + MTK_PPE_MIB_SER_R0);
600+ cnt_r1 = readl(ppe->base + MTK_PPE_MIB_SER_R1);
601+ cnt_r2 = readl(ppe->base + MTK_PPE_MIB_SER_R2);
602+
603+ byte_cnt_low = FIELD_GET(MTK_PPE_MIB_SER_R0_BYTE_CNT_LOW, cnt_r0);
604+ byte_cnt_high = FIELD_GET(MTK_PPE_MIB_SER_R1_BYTE_CNT_HIGH, cnt_r1);
605+ pkt_cnt_low = FIELD_GET(MTK_PPE_MIB_SER_R1_PKT_CNT_LOW, cnt_r1);
606+ pkt_cnt_high = FIELD_GET(MTK_PPE_MIB_SER_R2_PKT_CNT_HIGH, cnt_r2);
607+
608+ acct->bytes += ((u64)byte_cnt_high << 32) | byte_cnt_low;
609+ acct->packets += (pkt_cnt_high << 16) | pkt_cnt_low;
610+
611+ return acct;
612+}
613+
developer8cb3ac72022-07-04 10:55:14 +0800614+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
615+{
616+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
617+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
618+}
619+
620+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
621+{
622+ mtk_ppe_cache_clear(ppe);
623+
624+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
625+ enable * MTK_PPE_CACHE_CTL_EN);
626+}
627+
developer7eb15dc2023-06-14 17:44:03 +0800628+static u32 mtk_ppe_hash_entry(struct mtk_eth *eth, struct mtk_foe_entry *e)
developer8cb3ac72022-07-04 10:55:14 +0800629+{
630+ u32 hv1, hv2, hv3;
631+ u32 hash;
632+
developer7eb15dc2023-06-14 17:44:03 +0800633+ switch (mtk_get_ib1_pkt_type(eth, e->ib1)) {
developer8cb3ac72022-07-04 10:55:14 +0800634+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
635+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
636+ hv1 = e->ipv4.orig.ports;
637+ hv2 = e->ipv4.orig.dest_ip;
638+ hv3 = e->ipv4.orig.src_ip;
639+ break;
640+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
641+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
642+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
643+ hv1 ^= e->ipv6.ports;
644+
645+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
646+ hv2 ^= e->ipv6.dest_ip[0];
647+
648+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
649+ hv3 ^= e->ipv6.src_ip[0];
650+ break;
651+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
652+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
653+ default:
654+ WARN_ON_ONCE(1);
655+ return MTK_PPE_HASH_MASK;
656+ }
657+
658+ hash = (hv1 & hv2) | ((~hv1) & hv3);
659+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
660+ hash ^= hv1 ^ hv2 ^ hv3;
661+ hash ^= hash >> 16;
developer7eb15dc2023-06-14 17:44:03 +0800662+ hash <<= (ffs(eth->soc->hash_offset) - 1);
developer8cb3ac72022-07-04 10:55:14 +0800663+ hash &= MTK_PPE_ENTRIES - 1;
664+
665+ return hash;
666+}
667+
668+static inline struct mtk_foe_mac_info *
developer7eb15dc2023-06-14 17:44:03 +0800669+mtk_foe_entry_l2(struct mtk_eth *eth, struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800670+{
developer7eb15dc2023-06-14 17:44:03 +0800671+ int type = mtk_get_ib1_pkt_type(eth, entry->ib1);
672+
673+ if (type == MTK_PPE_PKT_TYPE_BRIDGE)
674+ return &entry->bridge.l2;
developer8cb3ac72022-07-04 10:55:14 +0800675+
676+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
677+ return &entry->ipv6.l2;
678+
679+ return &entry->ipv4.l2;
680+}
681+
682+static inline u32 *
developer7eb15dc2023-06-14 17:44:03 +0800683+mtk_foe_entry_ib2(struct mtk_eth *eth, struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800684+{
developer7eb15dc2023-06-14 17:44:03 +0800685+ int type = mtk_get_ib1_pkt_type(eth, entry->ib1);
686+
687+ if (type == MTK_PPE_PKT_TYPE_BRIDGE)
688+ return &entry->bridge.ib2;
developer8cb3ac72022-07-04 10:55:14 +0800689+
690+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
691+ return &entry->ipv6.ib2;
692+
693+ return &entry->ipv4.ib2;
694+}
695+
developer7eb15dc2023-06-14 17:44:03 +0800696+int mtk_foe_entry_prepare(struct mtk_eth *eth, struct mtk_foe_entry *entry,
697+ int type, int l4proto, u8 pse_port, u8 *src_mac,
698+ u8 *dest_mac)
developer8cb3ac72022-07-04 10:55:14 +0800699+{
700+ struct mtk_foe_mac_info *l2;
701+ u32 ports_pad, val;
702+
703+ memset(entry, 0, sizeof(*entry));
704+
developer7eb15dc2023-06-14 17:44:03 +0800705+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
706+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
707+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE_V2, type) |
708+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
709+ MTK_FOE_IB1_BIND_CACHE_V2 | MTK_FOE_IB1_BIND_TTL_V2;
710+ entry->ib1 = val;
711+
712+ val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT_V2, pse_port) |
713+ FIELD_PREP(MTK_FOE_IB2_PORT_AG_V2, 0xf);
714+ } else {
715+ int port_mg = eth->soc->offload_version > 1 ? 0 : 0x3f;
716+
717+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
718+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
719+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
720+ MTK_FOE_IB1_BIND_CACHE | MTK_FOE_IB1_BIND_TTL;
721+ entry->ib1 = val;
developer8cb3ac72022-07-04 10:55:14 +0800722+
developer7eb15dc2023-06-14 17:44:03 +0800723+ val = FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port) |
724+ FIELD_PREP(MTK_FOE_IB2_PORT_MG, port_mg) |
725+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f);
726+ }
developer8cb3ac72022-07-04 10:55:14 +0800727+
728+ if (is_multicast_ether_addr(dest_mac))
developer7eb15dc2023-06-14 17:44:03 +0800729+ val |= mtk_get_ib2_multicast_mask(eth);
developer8cb3ac72022-07-04 10:55:14 +0800730+
731+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
732+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
733+ entry->ipv4.orig.ports = ports_pad;
734+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
735+ entry->ipv6.ports = ports_pad;
736+
developer7eb15dc2023-06-14 17:44:03 +0800737+ if (type == MTK_PPE_PKT_TYPE_BRIDGE) {
738+ ether_addr_copy(entry->bridge.src_mac, src_mac);
739+ ether_addr_copy(entry->bridge.dest_mac, dest_mac);
740+ entry->bridge.ib2 = val;
741+ l2 = &entry->bridge.l2;
742+ } else if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
developer8cb3ac72022-07-04 10:55:14 +0800743+ entry->ipv6.ib2 = val;
744+ l2 = &entry->ipv6.l2;
745+ } else {
746+ entry->ipv4.ib2 = val;
747+ l2 = &entry->ipv4.l2;
748+ }
749+
750+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
751+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
752+ l2->src_mac_hi = get_unaligned_be32(src_mac);
753+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
754+
755+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
756+ l2->etype = ETH_P_IPV6;
757+ else
758+ l2->etype = ETH_P_IP;
759+
760+ return 0;
761+}
762+
developer7eb15dc2023-06-14 17:44:03 +0800763+int mtk_foe_entry_set_pse_port(struct mtk_eth *eth,
764+ struct mtk_foe_entry *entry, u8 port)
developer8cb3ac72022-07-04 10:55:14 +0800765+{
developer7eb15dc2023-06-14 17:44:03 +0800766+ u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
767+ u32 val = *ib2;
developer8cb3ac72022-07-04 10:55:14 +0800768+
developer7eb15dc2023-06-14 17:44:03 +0800769+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
770+ val &= ~MTK_FOE_IB2_DEST_PORT_V2;
771+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT_V2, port);
772+ } else {
773+ val &= ~MTK_FOE_IB2_DEST_PORT;
774+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
775+ }
developer8cb3ac72022-07-04 10:55:14 +0800776+ *ib2 = val;
777+
778+ return 0;
779+}
780+
developer7eb15dc2023-06-14 17:44:03 +0800781+int mtk_foe_entry_set_ipv4_tuple(struct mtk_eth *eth,
782+ struct mtk_foe_entry *entry, bool egress,
developer8cb3ac72022-07-04 10:55:14 +0800783+ __be32 src_addr, __be16 src_port,
784+ __be32 dest_addr, __be16 dest_port)
785+{
developer7eb15dc2023-06-14 17:44:03 +0800786+ int type = mtk_get_ib1_pkt_type(eth, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800787+ struct mtk_ipv4_tuple *t;
788+
789+ switch (type) {
790+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
791+ if (egress) {
792+ t = &entry->ipv4.new;
793+ break;
794+ }
795+ fallthrough;
796+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
797+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
798+ t = &entry->ipv4.orig;
799+ break;
800+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
801+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
802+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
803+ return 0;
804+ default:
805+ WARN_ON_ONCE(1);
806+ return -EINVAL;
807+ }
808+
809+ t->src_ip = be32_to_cpu(src_addr);
810+ t->dest_ip = be32_to_cpu(dest_addr);
811+
812+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
813+ return 0;
814+
815+ t->src_port = be16_to_cpu(src_port);
816+ t->dest_port = be16_to_cpu(dest_port);
817+
818+ return 0;
819+}
820+
developer7eb15dc2023-06-14 17:44:03 +0800821+int mtk_foe_entry_set_ipv6_tuple(struct mtk_eth *eth,
822+ struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +0800823+ __be32 *src_addr, __be16 src_port,
824+ __be32 *dest_addr, __be16 dest_port)
825+{
developer7eb15dc2023-06-14 17:44:03 +0800826+ int type = mtk_get_ib1_pkt_type(eth, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800827+ u32 *src, *dest;
828+ int i;
829+
830+ switch (type) {
831+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
832+ src = entry->dslite.tunnel_src_ip;
833+ dest = entry->dslite.tunnel_dest_ip;
834+ break;
835+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
836+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
837+ entry->ipv6.src_port = be16_to_cpu(src_port);
838+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
839+ fallthrough;
840+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
841+ src = entry->ipv6.src_ip;
842+ dest = entry->ipv6.dest_ip;
843+ break;
844+ default:
845+ WARN_ON_ONCE(1);
846+ return -EINVAL;
847+ }
848+
849+ for (i = 0; i < 4; i++)
850+ src[i] = be32_to_cpu(src_addr[i]);
851+ for (i = 0; i < 4; i++)
852+ dest[i] = be32_to_cpu(dest_addr[i]);
853+
854+ return 0;
855+}
856+
developer7eb15dc2023-06-14 17:44:03 +0800857+int mtk_foe_entry_set_dsa(struct mtk_eth *eth, struct mtk_foe_entry *entry,
858+ int port)
developer8cb3ac72022-07-04 10:55:14 +0800859+{
developer7eb15dc2023-06-14 17:44:03 +0800860+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry);
developer8cb3ac72022-07-04 10:55:14 +0800861+
862+ l2->etype = BIT(port);
863+
developer7eb15dc2023-06-14 17:44:03 +0800864+ if (!(entry->ib1 & mtk_get_ib1_vlan_layer_mask(eth)))
865+ entry->ib1 |= mtk_prep_ib1_vlan_layer(eth, 1);
developer8cb3ac72022-07-04 10:55:14 +0800866+ else
867+ l2->etype |= BIT(8);
868+
developer7eb15dc2023-06-14 17:44:03 +0800869+ entry->ib1 &= ~mtk_get_ib1_vlan_tag_mask(eth);
developer8cb3ac72022-07-04 10:55:14 +0800870+
871+ return 0;
872+}
873+
developer7eb15dc2023-06-14 17:44:03 +0800874+int mtk_foe_entry_set_vlan(struct mtk_eth *eth, struct mtk_foe_entry *entry,
875+ int vid)
developer8cb3ac72022-07-04 10:55:14 +0800876+{
developer7eb15dc2023-06-14 17:44:03 +0800877+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry);
developer8cb3ac72022-07-04 10:55:14 +0800878+
developer7eb15dc2023-06-14 17:44:03 +0800879+ switch (mtk_get_ib1_vlan_layer(eth, entry->ib1)) {
developer8cb3ac72022-07-04 10:55:14 +0800880+ case 0:
developer7eb15dc2023-06-14 17:44:03 +0800881+ entry->ib1 |= mtk_get_ib1_vlan_tag_mask(eth) |
882+ mtk_prep_ib1_vlan_layer(eth, 1);
developer8cb3ac72022-07-04 10:55:14 +0800883+ l2->vlan1 = vid;
884+ return 0;
885+ case 1:
developer7eb15dc2023-06-14 17:44:03 +0800886+ if (!(entry->ib1 & mtk_get_ib1_vlan_tag_mask(eth))) {
developer8cb3ac72022-07-04 10:55:14 +0800887+ l2->vlan1 = vid;
888+ l2->etype |= BIT(8);
889+ } else {
890+ l2->vlan2 = vid;
developer7eb15dc2023-06-14 17:44:03 +0800891+ entry->ib1 += mtk_prep_ib1_vlan_layer(eth, 1);
developer8cb3ac72022-07-04 10:55:14 +0800892+ }
893+ return 0;
894+ default:
895+ return -ENOSPC;
896+ }
897+}
898+
developer7eb15dc2023-06-14 17:44:03 +0800899+int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry,
900+ int sid)
developer8cb3ac72022-07-04 10:55:14 +0800901+{
developer7eb15dc2023-06-14 17:44:03 +0800902+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry);
developer8cb3ac72022-07-04 10:55:14 +0800903+
developer7eb15dc2023-06-14 17:44:03 +0800904+ if (!(entry->ib1 & mtk_get_ib1_vlan_layer_mask(eth)) ||
905+ (entry->ib1 & mtk_get_ib1_vlan_tag_mask(eth)))
developer8cb3ac72022-07-04 10:55:14 +0800906+ l2->etype = ETH_P_PPP_SES;
907+
developer7eb15dc2023-06-14 17:44:03 +0800908+ entry->ib1 |= mtk_get_ib1_ppoe_mask(eth);
developer8cb3ac72022-07-04 10:55:14 +0800909+ l2->pppoe_id = sid;
910+
911+ return 0;
912+}
913+
developer7eb15dc2023-06-14 17:44:03 +0800914+int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
915+ int wdma_idx, int txq, int bss, int wcid)
916+{
917+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(eth, entry);
918+ u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
919+
920+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
921+ *ib2 &= ~MTK_FOE_IB2_PORT_MG_V2;
922+ *ib2 |= FIELD_PREP(MTK_FOE_IB2_RX_IDX, txq) |
923+ MTK_FOE_IB2_WDMA_WINFO_V2;
924+ l2->winfo = FIELD_PREP(MTK_FOE_WINFO_WCID, wcid) |
925+ FIELD_PREP(MTK_FOE_WINFO_BSS, bss);
926+ } else {
927+ *ib2 &= ~MTK_FOE_IB2_PORT_MG;
928+ *ib2 |= MTK_FOE_IB2_WDMA_WINFO;
929+ if (wdma_idx)
930+ *ib2 |= MTK_FOE_IB2_WDMA_DEVIDX;
931+ l2->vlan2 = FIELD_PREP(MTK_FOE_VLAN2_WINFO_BSS, bss) |
932+ FIELD_PREP(MTK_FOE_VLAN2_WINFO_WCID, wcid) |
933+ FIELD_PREP(MTK_FOE_VLAN2_WINFO_RING, txq);
934+ }
935+
936+ return 0;
937+}
938+
developer8cb3ac72022-07-04 10:55:14 +0800939+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
940+{
941+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
942+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
943+}
944+
developer7eb15dc2023-06-14 17:44:03 +0800945+int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
946+ unsigned int queue)
947+{
948+ u32 *ib2 = mtk_foe_entry_ib2(eth, entry);
949+
950+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
951+ *ib2 &= ~MTK_FOE_IB2_QID_V2;
952+ *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID_V2, queue);
953+ *ib2 |= MTK_FOE_IB2_PSE_QOS_V2;
954+ } else {
955+ *ib2 &= ~MTK_FOE_IB2_QID;
956+ *ib2 |= FIELD_PREP(MTK_FOE_IB2_QID, queue);
957+ *ib2 |= MTK_FOE_IB2_PSE_QOS;
958+ }
959+
960+ return 0;
961+}
962+
963+static int
964+mtk_flow_entry_match_len(struct mtk_eth *eth, struct mtk_foe_entry *entry)
965+{
966+ int type = mtk_get_ib1_pkt_type(eth, entry->ib1);
967+
968+ if (type > MTK_PPE_PKT_TYPE_IPV4_DSLITE)
969+ return offsetof(struct mtk_foe_entry, ipv6._rsv);
970+ else
971+ return offsetof(struct mtk_foe_entry, ipv4.ib2);
972+}
973+
974+static bool
975+mtk_flow_entry_match(struct mtk_eth *eth, struct mtk_flow_entry *entry,
976+ struct mtk_foe_entry *data, int len)
977+{
978+ if ((data->ib1 ^ entry->data.ib1) & MTK_FOE_IB1_UDP)
979+ return false;
980+
981+ return !memcmp(&entry->data.data, &data->data, len - 4);
982+}
983+
984+static void
985+__mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
986+ bool set_state)
987+{
988+ struct hlist_node *tmp;
989+
990+ if (entry->type == MTK_FLOW_TYPE_L2) {
991+ rhashtable_remove_fast(&ppe->l2_flows, &entry->l2_node,
992+ mtk_flow_l2_ht_params);
993+
994+ hlist_for_each_entry_safe(entry, tmp, &entry->l2_flows, l2_list)
995+ __mtk_foe_entry_clear(ppe, entry, set_state);
996+ return;
997+ }
998+
999+ if (entry->hash != 0xffff && set_state) {
1000+ struct mtk_foe_entry *hwe = mtk_foe_get_entry(ppe, entry->hash);
1001+
1002+ hwe->ib1 &= ~MTK_FOE_IB1_STATE;
1003+ hwe->ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_INVALID);
1004+ dma_wmb();
1005+ mtk_ppe_cache_clear(ppe);
1006+ }
1007+ entry->hash = 0xffff;
1008+
1009+ if (entry->type != MTK_FLOW_TYPE_L2_SUBFLOW)
1010+ return;
1011+
1012+ hlist_del_init(&entry->l2_list);
1013+ hlist_del_init(&entry->list);
1014+ kfree(entry);
1015+}
1016+
1017+static int __mtk_foe_entry_idle_time(struct mtk_ppe *ppe, u32 ib1)
1018+{
1019+ u32 ib1_ts_mask = mtk_get_ib1_ts_mask(ppe->eth);
1020+ u16 now = mtk_eth_timestamp(ppe->eth);
1021+ u16 timestamp = ib1 & ib1_ts_mask;
1022+
1023+ if (timestamp > now)
1024+ return ib1_ts_mask + 1 - timestamp + now;
1025+ else
1026+ return now - timestamp;
1027+}
1028+
1029+static bool
1030+mtk_flow_entry_update(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
1031+ u64 *packets, u64 *bytes)
developer8cb3ac72022-07-04 10:55:14 +08001032+{
developer7eb15dc2023-06-14 17:44:03 +08001033+ struct mtk_foe_accounting *acct;
1034+ struct mtk_foe_entry foe = {};
developer8cb3ac72022-07-04 10:55:14 +08001035+ struct mtk_foe_entry *hwe;
developer7eb15dc2023-06-14 17:44:03 +08001036+ u16 hash = entry->hash;
1037+ bool ret = false;
1038+ int len;
developer8cb3ac72022-07-04 10:55:14 +08001039+
developer7eb15dc2023-06-14 17:44:03 +08001040+ if (hash == 0xffff)
1041+ return false;
developer8cb3ac72022-07-04 10:55:14 +08001042+
developer7eb15dc2023-06-14 17:44:03 +08001043+ hwe = mtk_foe_get_entry(ppe, hash);
1044+ len = mtk_flow_entry_match_len(ppe->eth, &entry->data);
1045+ memcpy(&foe, hwe, len);
developer8cb3ac72022-07-04 10:55:14 +08001046+
developer7eb15dc2023-06-14 17:44:03 +08001047+ if (!mtk_flow_entry_match(ppe->eth, entry, &foe, len) ||
1048+ FIELD_GET(MTK_FOE_IB1_STATE, foe.ib1) != MTK_FOE_STATE_BIND) {
1049+ acct = mtk_ppe_acct_data(ppe, hash);
1050+ if (acct) {
1051+ entry->prev_packets += acct->packets;
1052+ entry->prev_bytes += acct->bytes;
1053+ }
1054+
1055+ goto out;
developer8cb3ac72022-07-04 10:55:14 +08001056+ }
1057+
developer7eb15dc2023-06-14 17:44:03 +08001058+ entry->data.ib1 = foe.ib1;
1059+ acct = mtk_ppe_mib_entry_read(ppe, hash);
1060+ ret = true;
1061+
1062+out:
1063+ if (acct) {
1064+ *packets += acct->packets;
1065+ *bytes += acct->bytes;
1066+ }
1067+
1068+ return ret;
1069+}
1070+
1071+static void
1072+mtk_flow_entry_update_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
1073+{
1074+ u32 ib1_ts_mask = mtk_get_ib1_ts_mask(ppe->eth);
1075+ u64 *packets = &entry->packets;
1076+ u64 *bytes = &entry->bytes;
1077+ struct mtk_flow_entry *cur;
1078+ struct hlist_node *tmp;
1079+ int idle;
1080+
1081+ idle = __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
1082+ hlist_for_each_entry_safe(cur, tmp, &entry->l2_flows, l2_list) {
1083+ int cur_idle;
1084+
1085+ if (!mtk_flow_entry_update(ppe, cur, packets, bytes)) {
1086+ entry->prev_packets += cur->prev_packets;
1087+ entry->prev_bytes += cur->prev_bytes;
1088+ __mtk_foe_entry_clear(ppe, entry, false);
1089+ continue;
1090+ }
1091+
1092+ cur_idle = __mtk_foe_entry_idle_time(ppe, cur->data.ib1);
1093+ if (cur_idle >= idle)
1094+ continue;
1095+
1096+ idle = cur_idle;
1097+ entry->data.ib1 &= ~ib1_ts_mask;
1098+ entry->data.ib1 |= cur->data.ib1 & ib1_ts_mask;
1099+ }
1100+}
1101+
1102+void mtk_foe_entry_get_stats(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
1103+ int *idle)
1104+{
1105+ entry->packets = entry->prev_packets;
1106+ entry->bytes = entry->prev_bytes;
1107+
1108+ spin_lock_bh(&ppe_lock);
1109+
1110+ if (entry->type == MTK_FLOW_TYPE_L2)
1111+ mtk_flow_entry_update_l2(ppe, entry);
1112+ else
1113+ mtk_flow_entry_update(ppe, entry, &entry->packets, &entry->bytes);
1114+
1115+ *idle = __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
1116+
1117+ spin_unlock_bh(&ppe_lock);
1118+}
1119+
1120+static void
1121+__mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1122+ u16 hash)
1123+{
1124+ struct mtk_foe_accounting *acct;
1125+ struct mtk_eth *eth = ppe->eth;
1126+ u16 timestamp = mtk_eth_timestamp(eth);
1127+ struct mtk_foe_entry *hwe;
1128+ u32 val;
1129+
1130+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
1131+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP_V2;
1132+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP_V2,
1133+ timestamp);
1134+ } else {
1135+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
1136+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP,
1137+ timestamp);
1138+ }
1139+
1140+ hwe = mtk_foe_get_entry(ppe, hash);
1141+ memcpy(&hwe->data, &entry->data, eth->soc->foe_entry_size - sizeof(hwe->ib1));
developer8cb3ac72022-07-04 10:55:14 +08001142+ wmb();
1143+ hwe->ib1 = entry->ib1;
1144+
developer7eb15dc2023-06-14 17:44:03 +08001145+ if (ppe->accounting) {
1146+ if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2))
1147+ val = MTK_FOE_IB2_MIB_CNT_V2;
1148+ else
1149+ val = MTK_FOE_IB2_MIB_CNT;
1150+ *mtk_foe_entry_ib2(eth, hwe) |= val;
1151+ }
1152+
developer8cb3ac72022-07-04 10:55:14 +08001153+ dma_wmb();
1154+
developer7eb15dc2023-06-14 17:44:03 +08001155+ acct = mtk_ppe_mib_entry_read(ppe, hash);
1156+ if (acct) {
1157+ acct->packets = 0;
1158+ acct->bytes = 0;
1159+ }
1160+
developer8cb3ac72022-07-04 10:55:14 +08001161+ mtk_ppe_cache_clear(ppe);
developer7eb15dc2023-06-14 17:44:03 +08001162+}
developer8cb3ac72022-07-04 10:55:14 +08001163+
developer7eb15dc2023-06-14 17:44:03 +08001164+void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
1165+{
1166+ spin_lock_bh(&ppe_lock);
1167+ __mtk_foe_entry_clear(ppe, entry, true);
1168+ hlist_del_init(&entry->list);
1169+ spin_unlock_bh(&ppe_lock);
1170+}
1171+
1172+static int
1173+mtk_foe_entry_commit_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
1174+{
1175+ struct mtk_flow_entry *prev;
1176+
1177+ entry->type = MTK_FLOW_TYPE_L2;
1178+
1179+ prev = rhashtable_lookup_get_insert_fast(&ppe->l2_flows, &entry->l2_node,
1180+ mtk_flow_l2_ht_params);
1181+ if (likely(!prev))
1182+ return 0;
1183+
1184+ if (IS_ERR(prev))
1185+ return PTR_ERR(prev);
1186+
1187+ return rhashtable_replace_fast(&ppe->l2_flows, &prev->l2_node,
1188+ &entry->l2_node, mtk_flow_l2_ht_params);
1189+}
1190+
1191+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
1192+{
1193+ const struct mtk_soc_data *soc = ppe->eth->soc;
1194+ int type = mtk_get_ib1_pkt_type(ppe->eth, entry->data.ib1);
1195+ u32 hash;
1196+
1197+ if (type == MTK_PPE_PKT_TYPE_BRIDGE)
1198+ return mtk_foe_entry_commit_l2(ppe, entry);
1199+
1200+ hash = mtk_ppe_hash_entry(ppe->eth, &entry->data);
1201+ entry->hash = 0xffff;
1202+ spin_lock_bh(&ppe_lock);
1203+ hlist_add_head(&entry->list, &ppe->foe_flow[hash / soc->hash_offset]);
1204+ spin_unlock_bh(&ppe_lock);
1205+
1206+ return 0;
1207+}
1208+
1209+static void
1210+mtk_foe_entry_commit_subflow(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
1211+ u16 hash)
1212+{
1213+ const struct mtk_soc_data *soc = ppe->eth->soc;
1214+ struct mtk_flow_entry *flow_info;
1215+ struct mtk_foe_mac_info *l2;
1216+ struct mtk_foe_entry *hwe;
1217+ u32 ib1_mask = mtk_get_ib1_pkt_type_mask(ppe->eth) | MTK_FOE_IB1_UDP;
1218+ int type;
1219+
1220+ flow_info = kzalloc(sizeof(*flow_info), GFP_ATOMIC);
1221+ if (!flow_info)
1222+ return;
1223+
1224+ flow_info->type = MTK_FLOW_TYPE_L2_SUBFLOW;
1225+ flow_info->hash = hash;
1226+ hlist_add_head(&flow_info->list,
1227+ &ppe->foe_flow[hash / soc->hash_offset]);
1228+ hlist_add_head(&flow_info->l2_list, &entry->l2_flows);
1229+
1230+ hwe = mtk_foe_get_entry(ppe, hash);
1231+ memcpy(&flow_info->data, hwe, soc->foe_entry_size);
1232+ flow_info->data.ib1 &= ib1_mask;
1233+ flow_info->data.ib1 |= entry->data.ib1 & ~ib1_mask;
1234+
1235+ l2 = mtk_foe_entry_l2(ppe->eth, &flow_info->data);
1236+ memcpy(l2, &entry->data.bridge.l2, sizeof(*l2));
1237+
1238+ type = mtk_get_ib1_pkt_type(ppe->eth, flow_info->data.ib1);
1239+ if (type == MTK_PPE_PKT_TYPE_IPV4_HNAPT)
1240+ memcpy(&flow_info->data.ipv4.new, &flow_info->data.ipv4.orig,
1241+ sizeof(flow_info->data.ipv4.new));
1242+ else if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T && l2->etype == ETH_P_IP)
1243+ l2->etype = ETH_P_IPV6;
1244+
1245+ *mtk_foe_entry_ib2(ppe->eth, &flow_info->data) = entry->data.bridge.ib2;
1246+
1247+ __mtk_foe_entry_commit(ppe, &flow_info->data, hash);
1248+}
1249+
1250+void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
1251+{
1252+ const struct mtk_soc_data *soc = ppe->eth->soc;
1253+ struct hlist_head *head = &ppe->foe_flow[hash / soc->hash_offset];
1254+ struct mtk_foe_entry *hwe = mtk_foe_get_entry(ppe, hash);
1255+ struct mtk_flow_entry *entry;
1256+ struct mtk_foe_bridge key = {};
1257+ struct mtk_foe_entry foe = {};
1258+ struct hlist_node *n;
1259+ struct ethhdr *eh;
1260+ bool found = false;
1261+ int entry_len;
1262+ u8 *tag;
1263+
1264+ spin_lock_bh(&ppe_lock);
1265+
1266+ if (FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) == MTK_FOE_STATE_BIND)
1267+ goto out;
1268+
1269+ entry_len = mtk_flow_entry_match_len(ppe->eth, hwe);
1270+ memcpy(&foe, hwe, entry_len);
1271+
1272+ hlist_for_each_entry_safe(entry, n, head, list) {
1273+ if (found ||
1274+ !mtk_flow_entry_match(ppe->eth, entry, &foe, entry_len)) {
1275+ if (entry->hash != 0xffff)
1276+ __mtk_foe_entry_clear(ppe, entry, false);
1277+ continue;
1278+ }
1279+
1280+ entry->hash = hash;
1281+ __mtk_foe_entry_commit(ppe, &entry->data, hash);
1282+ found = true;
1283+ }
1284+
1285+ if (found)
1286+ goto out;
1287+
1288+ if (!skb)
1289+ goto out;
1290+
1291+ eh = eth_hdr(skb);
1292+ ether_addr_copy(key.dest_mac, eh->h_dest);
1293+ ether_addr_copy(key.src_mac, eh->h_source);
1294+ tag = skb->data - 2;
1295+ key.vlan = 0;
1296+ switch (skb->protocol) {
1297+#if IS_ENABLED(CONFIG_NET_DSA)
1298+ case htons(ETH_P_XDSA):
1299+ if (!netdev_uses_dsa(skb->dev) ||
1300+ skb->dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK)
1301+ goto out;
1302+
1303+ if (!skb_metadata_dst(skb))
1304+ tag += 4;
1305+
1306+ if (get_unaligned_be16(tag) != ETH_P_8021Q)
1307+ break;
1308+
1309+ fallthrough;
1310+#endif
1311+ case htons(ETH_P_8021Q):
1312+ key.vlan = get_unaligned_be16(tag + 2) & VLAN_VID_MASK;
1313+ break;
1314+ default:
1315+ break;
1316+ }
1317+
1318+ entry = rhashtable_lookup_fast(&ppe->l2_flows, &key, mtk_flow_l2_ht_params);
1319+ if (!entry)
1320+ goto out;
1321+
1322+ mtk_foe_entry_commit_subflow(ppe, entry, hash);
1323+
1324+out:
1325+ spin_unlock_bh(&ppe_lock);
developer8cb3ac72022-07-04 10:55:14 +08001326+}
1327+
developer7eb15dc2023-06-14 17:44:03 +08001328+int mtk_ppe_prepare_reset(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +08001329+{
developer7eb15dc2023-06-14 17:44:03 +08001330+ if (!ppe)
1331+ return -EINVAL;
1332+
1333+ /* disable KA */
1334+ ppe_clear(ppe, MTK_PPE_TB_CFG, MTK_PPE_TB_CFG_KEEPALIVE);
1335+ ppe_clear(ppe, MTK_PPE_BIND_LMT1, MTK_PPE_NTU_KEEPALIVE);
1336+ ppe_w32(ppe, MTK_PPE_KEEPALIVE, 0);
1337+ usleep_range(10000, 11000);
1338+
1339+ /* set KA timer to maximum */
1340+ ppe_set(ppe, MTK_PPE_BIND_LMT1, MTK_PPE_NTU_KEEPALIVE);
1341+ ppe_w32(ppe, MTK_PPE_KEEPALIVE, 0xffffffff);
1342+
1343+ /* set KA tick select */
1344+ ppe_set(ppe, MTK_PPE_TB_CFG, MTK_PPE_TB_TICK_SEL);
1345+ ppe_set(ppe, MTK_PPE_TB_CFG, MTK_PPE_TB_CFG_KEEPALIVE);
1346+ usleep_range(10000, 11000);
1347+
1348+ /* disable scan mode */
1349+ ppe_clear(ppe, MTK_PPE_TB_CFG, MTK_PPE_TB_CFG_SCAN_MODE);
1350+ usleep_range(10000, 11000);
1351+
1352+ return mtk_ppe_wait_busy(ppe);
1353+}
1354+
1355+struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int index)
1356+{
1357+ bool accounting = eth->soc->has_accounting;
1358+ const struct mtk_soc_data *soc = eth->soc;
1359+ struct mtk_foe_accounting *acct;
1360+ struct device *dev = eth->dev;
1361+ struct mtk_mib_entry *mib;
1362+ struct mtk_ppe *ppe;
1363+ u32 foe_flow_size;
1364+ void *foe;
1365+
1366+ ppe = devm_kzalloc(dev, sizeof(*ppe), GFP_KERNEL);
1367+ if (!ppe)
1368+ return NULL;
1369+
1370+ rhashtable_init(&ppe->l2_flows, &mtk_flow_l2_ht_params);
developer8cb3ac72022-07-04 10:55:14 +08001371+
1372+ /* need to allocate a separate device, since it PPE DMA access is
1373+ * not coherent.
1374+ */
1375+ ppe->base = base;
developer7eb15dc2023-06-14 17:44:03 +08001376+ ppe->eth = eth;
developer8cb3ac72022-07-04 10:55:14 +08001377+ ppe->dev = dev;
developer7eb15dc2023-06-14 17:44:03 +08001378+ ppe->version = eth->soc->offload_version;
1379+ ppe->accounting = accounting;
developer8cb3ac72022-07-04 10:55:14 +08001380+
developer7eb15dc2023-06-14 17:44:03 +08001381+ foe = dmam_alloc_coherent(ppe->dev,
1382+ MTK_PPE_ENTRIES * soc->foe_entry_size,
developer8cb3ac72022-07-04 10:55:14 +08001383+ &ppe->foe_phys, GFP_KERNEL);
1384+ if (!foe)
developer7eb15dc2023-06-14 17:44:03 +08001385+ return NULL;
developer8cb3ac72022-07-04 10:55:14 +08001386+
1387+ ppe->foe_table = foe;
1388+
developer7eb15dc2023-06-14 17:44:03 +08001389+ foe_flow_size = (MTK_PPE_ENTRIES / soc->hash_offset) *
1390+ sizeof(*ppe->foe_flow);
1391+ ppe->foe_flow = devm_kzalloc(dev, foe_flow_size, GFP_KERNEL);
1392+ if (!ppe->foe_flow)
1393+ return NULL;
developer8cb3ac72022-07-04 10:55:14 +08001394+
developer7eb15dc2023-06-14 17:44:03 +08001395+ if (accounting) {
1396+ mib = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*mib),
1397+ &ppe->mib_phys, GFP_KERNEL);
1398+ if (!mib)
1399+ return NULL;
1400+
1401+ ppe->mib_table = mib;
1402+
1403+ acct = devm_kzalloc(dev, MTK_PPE_ENTRIES * sizeof(*acct),
1404+ GFP_KERNEL);
1405+
1406+ if (!acct)
1407+ return NULL;
1408+
1409+ ppe->acct_table = acct;
1410+ }
1411+
1412+ mtk_ppe_debugfs_init(ppe, index);
1413+
1414+ return ppe;
developer8cb3ac72022-07-04 10:55:14 +08001415+}
1416+
1417+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
1418+{
1419+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
1420+ int i, k;
1421+
developer7eb15dc2023-06-14 17:44:03 +08001422+ memset(ppe->foe_table, 0,
1423+ MTK_PPE_ENTRIES * ppe->eth->soc->foe_entry_size);
developer8cb3ac72022-07-04 10:55:14 +08001424+
1425+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
1426+ return;
1427+
1428+ /* skip all entries that cross the 1024 byte boundary */
developer7eb15dc2023-06-14 17:44:03 +08001429+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128) {
1430+ for (k = 0; k < ARRAY_SIZE(skip); k++) {
1431+ struct mtk_foe_entry *hwe;
1432+
1433+ hwe = mtk_foe_get_entry(ppe, i + skip[k]);
1434+ hwe->ib1 |= MTK_FOE_IB1_STATIC;
1435+ }
1436+ }
developer8cb3ac72022-07-04 10:55:14 +08001437+}
1438+
developer7eb15dc2023-06-14 17:44:03 +08001439+void mtk_ppe_start(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +08001440+{
1441+ u32 val;
1442+
developer7eb15dc2023-06-14 17:44:03 +08001443+ if (!ppe)
1444+ return;
1445+
developer8cb3ac72022-07-04 10:55:14 +08001446+ mtk_ppe_init_foe_table(ppe);
1447+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
1448+
1449+ val = MTK_PPE_TB_CFG_ENTRY_80B |
1450+ MTK_PPE_TB_CFG_AGE_NON_L4 |
1451+ MTK_PPE_TB_CFG_AGE_UNBIND |
1452+ MTK_PPE_TB_CFG_AGE_TCP |
1453+ MTK_PPE_TB_CFG_AGE_UDP |
1454+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
1455+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
1456+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
1457+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
1458+ MTK_PPE_KEEPALIVE_DISABLE) |
1459+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
1460+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
1461+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
1462+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
1463+ MTK_PPE_ENTRIES_SHIFT);
developer7eb15dc2023-06-14 17:44:03 +08001464+ if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2))
1465+ val |= MTK_PPE_TB_CFG_INFO_SEL;
developer8cb3ac72022-07-04 10:55:14 +08001466+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
1467+
1468+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
1469+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
1470+
1471+ mtk_ppe_cache_enable(ppe, true);
1472+
developer7eb15dc2023-06-14 17:44:03 +08001473+ val = MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
developer8cb3ac72022-07-04 10:55:14 +08001474+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
1475+ MTK_PPE_FLOW_CFG_IP6_6RD |
1476+ MTK_PPE_FLOW_CFG_IP4_NAT |
1477+ MTK_PPE_FLOW_CFG_IP4_NAPT |
1478+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
developer8cb3ac72022-07-04 10:55:14 +08001479+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
developer7eb15dc2023-06-14 17:44:03 +08001480+ if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2))
1481+ val |= MTK_PPE_MD_TOAP_BYP_CRSN0 |
1482+ MTK_PPE_MD_TOAP_BYP_CRSN1 |
1483+ MTK_PPE_MD_TOAP_BYP_CRSN2 |
1484+ MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY;
1485+ else
1486+ val |= MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
1487+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG;
developer8cb3ac72022-07-04 10:55:14 +08001488+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
1489+
1490+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
1491+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
1492+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
1493+
developer7eb15dc2023-06-14 17:44:03 +08001494+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
developer8cb3ac72022-07-04 10:55:14 +08001495+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
1496+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
1497+
1498+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
developer7eb15dc2023-06-14 17:44:03 +08001499+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
developer8cb3ac72022-07-04 10:55:14 +08001500+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
1501+
1502+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
1503+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
1504+
1505+ val = MTK_PPE_BIND_LIMIT1_FULL |
1506+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
1507+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
1508+
1509+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
1510+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
1511+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
1512+
1513+ /* enable PPE */
1514+ val = MTK_PPE_GLO_CFG_EN |
1515+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
1516+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
1517+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
1518+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
1519+
1520+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
1521+
developer7eb15dc2023-06-14 17:44:03 +08001522+ if (MTK_HAS_CAPS(ppe->eth->soc->caps, MTK_NETSYS_V2)) {
1523+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT1, 0xcb777);
1524+ ppe_w32(ppe, MTK_PPE_SBW_CTRL, 0x7f);
1525+ }
1526+
1527+ if (ppe->accounting && ppe->mib_phys) {
1528+ ppe_w32(ppe, MTK_PPE_MIB_TB_BASE, ppe->mib_phys);
1529+ ppe_m32(ppe, MTK_PPE_MIB_CFG, MTK_PPE_MIB_CFG_EN,
1530+ MTK_PPE_MIB_CFG_EN);
1531+ ppe_m32(ppe, MTK_PPE_MIB_CFG, MTK_PPE_MIB_CFG_RD_CLR,
1532+ MTK_PPE_MIB_CFG_RD_CLR);
1533+ ppe_m32(ppe, MTK_PPE_MIB_CACHE_CTL, MTK_PPE_MIB_CACHE_CTL_EN,
1534+ MTK_PPE_MIB_CFG_RD_CLR);
1535+ }
developer8cb3ac72022-07-04 10:55:14 +08001536+}
1537+
1538+int mtk_ppe_stop(struct mtk_ppe *ppe)
1539+{
1540+ u32 val;
1541+ int i;
1542+
developer7eb15dc2023-06-14 17:44:03 +08001543+ if (!ppe)
1544+ return 0;
1545+
1546+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
1547+ struct mtk_foe_entry *hwe = mtk_foe_get_entry(ppe, i);
1548+
1549+ hwe->ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
1550+ MTK_FOE_STATE_INVALID);
1551+ }
developer8cb3ac72022-07-04 10:55:14 +08001552+
1553+ mtk_ppe_cache_enable(ppe, false);
1554+
1555+ /* disable offload engine */
1556+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
1557+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
1558+
1559+ /* disable aging */
1560+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
1561+ MTK_PPE_TB_CFG_AGE_UNBIND |
1562+ MTK_PPE_TB_CFG_AGE_TCP |
1563+ MTK_PPE_TB_CFG_AGE_UDP |
1564+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
1565+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
1566+
1567+ return mtk_ppe_wait_busy(ppe);
1568+}
1569diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
1570new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +08001571index 0000000..1fdfb93
developer8cb3ac72022-07-04 10:55:14 +08001572--- /dev/null
1573+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
developer7eb15dc2023-06-14 17:44:03 +08001574@@ -0,0 +1,384 @@
developer8cb3ac72022-07-04 10:55:14 +08001575+// SPDX-License-Identifier: GPL-2.0-only
1576+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1577+
1578+#ifndef __MTK_PPE_H
1579+#define __MTK_PPE_H
1580+
1581+#include <linux/kernel.h>
1582+#include <linux/bitfield.h>
developer7eb15dc2023-06-14 17:44:03 +08001583+#include <linux/rhashtable.h>
developer8cb3ac72022-07-04 10:55:14 +08001584+
1585+#define MTK_PPE_ENTRIES_SHIFT 3
1586+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
1587+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
1588+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
1589+
1590+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
1591+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
1592+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
1593+
1594+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
1595+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
1596+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
1597+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
1598+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
1599+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
1600+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
1601+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
1602+#define MTK_FOE_IB1_BIND_TTL BIT(24)
1603+
1604+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
1605+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
1606+#define MTK_FOE_IB1_UDP BIT(30)
1607+#define MTK_FOE_IB1_STATIC BIT(31)
1608+
developer7eb15dc2023-06-14 17:44:03 +08001609+/* CONFIG_MEDIATEK_NETSYS_V2 */
1610+#define MTK_FOE_IB1_BIND_TIMESTAMP_V2 GENMASK(7, 0)
1611+#define MTK_FOE_IB1_BIND_VLAN_LAYER_V2 GENMASK(16, 14)
1612+#define MTK_FOE_IB1_BIND_PPPOE_V2 BIT(17)
1613+#define MTK_FOE_IB1_BIND_VLAN_TAG_V2 BIT(18)
1614+#define MTK_FOE_IB1_BIND_CACHE_V2 BIT(20)
1615+#define MTK_FOE_IB1_BIND_TTL_V2 BIT(22)
1616+#define MTK_FOE_IB1_PACKET_TYPE_V2 GENMASK(27, 23)
1617+
developer8cb3ac72022-07-04 10:55:14 +08001618+enum {
1619+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
1620+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
1621+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
1622+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
1623+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
1624+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
1625+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
1626+};
1627+
1628+#define MTK_FOE_IB2_QID GENMASK(3, 0)
1629+#define MTK_FOE_IB2_PSE_QOS BIT(4)
1630+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
1631+#define MTK_FOE_IB2_MULTICAST BIT(8)
developer7eb15dc2023-06-14 17:44:03 +08001632+#define MTK_FOE_IB2_MIB_CNT BIT(10)
developer8cb3ac72022-07-04 10:55:14 +08001633+
developer7eb15dc2023-06-14 17:44:03 +08001634+#define MTK_FOE_IB2_WDMA_QID2 GENMASK(13, 12)
1635+#define MTK_FOE_IB2_MIB_CNT_V2 BIT(15)
1636+#define MTK_FOE_IB2_WDMA_DEVIDX BIT(16)
1637+#define MTK_FOE_IB2_WDMA_WINFO BIT(17)
developer8cb3ac72022-07-04 10:55:14 +08001638+
1639+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
1640+
developer7eb15dc2023-06-14 17:44:03 +08001641+#define MTK_FOE_IB2_RX_IDX GENMASK(18, 17)
developer8cb3ac72022-07-04 10:55:14 +08001642+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
1643+
1644+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
1645+
developer7eb15dc2023-06-14 17:44:03 +08001646+/* CONFIG_MEDIATEK_NETSYS_V2 */
1647+#define MTK_FOE_IB2_QID_V2 GENMASK(6, 0)
1648+#define MTK_FOE_IB2_PORT_MG_V2 BIT(7)
1649+#define MTK_FOE_IB2_PSE_QOS_V2 BIT(8)
1650+#define MTK_FOE_IB2_DEST_PORT_V2 GENMASK(12, 9)
1651+#define MTK_FOE_IB2_MULTICAST_V2 BIT(13)
1652+#define MTK_FOE_IB2_WDMA_WINFO_V2 BIT(19)
1653+#define MTK_FOE_IB2_PORT_AG_V2 GENMASK(23, 20)
1654+
1655+#define MTK_FOE_VLAN2_WINFO_BSS GENMASK(5, 0)
1656+#define MTK_FOE_VLAN2_WINFO_WCID GENMASK(13, 6)
1657+#define MTK_FOE_VLAN2_WINFO_RING GENMASK(15, 14)
1658+
1659+#define MTK_FOE_WINFO_BSS GENMASK(5, 0)
1660+#define MTK_FOE_WINFO_WCID GENMASK(15, 6)
developer8cb3ac72022-07-04 10:55:14 +08001661+
1662+enum {
1663+ MTK_FOE_STATE_INVALID,
1664+ MTK_FOE_STATE_UNBIND,
1665+ MTK_FOE_STATE_BIND,
1666+ MTK_FOE_STATE_FIN
1667+};
1668+
1669+struct mtk_foe_mac_info {
1670+ u16 vlan1;
1671+ u16 etype;
1672+
1673+ u32 dest_mac_hi;
1674+
1675+ u16 vlan2;
1676+ u16 dest_mac_lo;
1677+
1678+ u32 src_mac_hi;
1679+
1680+ u16 pppoe_id;
1681+ u16 src_mac_lo;
developer7eb15dc2023-06-14 17:44:03 +08001682+
1683+ u16 minfo;
1684+ u16 winfo;
developer8cb3ac72022-07-04 10:55:14 +08001685+};
1686+
developer7eb15dc2023-06-14 17:44:03 +08001687+/* software-only entry type */
developer8cb3ac72022-07-04 10:55:14 +08001688+struct mtk_foe_bridge {
developer7eb15dc2023-06-14 17:44:03 +08001689+ u8 dest_mac[ETH_ALEN];
1690+ u8 src_mac[ETH_ALEN];
1691+ u16 vlan;
developer8cb3ac72022-07-04 10:55:14 +08001692+
developer7eb15dc2023-06-14 17:44:03 +08001693+ struct {} key_end;
developer8cb3ac72022-07-04 10:55:14 +08001694+
1695+ u32 ib2;
1696+
developer8cb3ac72022-07-04 10:55:14 +08001697+ struct mtk_foe_mac_info l2;
1698+};
1699+
1700+struct mtk_ipv4_tuple {
1701+ u32 src_ip;
1702+ u32 dest_ip;
1703+ union {
1704+ struct {
1705+ u16 dest_port;
1706+ u16 src_port;
1707+ };
1708+ struct {
1709+ u8 protocol;
1710+ u8 _pad[3]; /* fill with 0xa5a5a5 */
1711+ };
1712+ u32 ports;
1713+ };
1714+};
1715+
1716+struct mtk_foe_ipv4 {
1717+ struct mtk_ipv4_tuple orig;
1718+
1719+ u32 ib2;
1720+
1721+ struct mtk_ipv4_tuple new;
1722+
1723+ u16 timestamp;
1724+ u16 _rsv0[3];
1725+
1726+ u32 udf_tsid;
1727+
1728+ struct mtk_foe_mac_info l2;
1729+};
1730+
1731+struct mtk_foe_ipv4_dslite {
1732+ struct mtk_ipv4_tuple ip4;
1733+
1734+ u32 tunnel_src_ip[4];
1735+ u32 tunnel_dest_ip[4];
1736+
1737+ u8 flow_label[3];
1738+ u8 priority;
1739+
1740+ u32 udf_tsid;
1741+
1742+ u32 ib2;
1743+
1744+ struct mtk_foe_mac_info l2;
1745+};
1746+
1747+struct mtk_foe_ipv6 {
1748+ u32 src_ip[4];
1749+ u32 dest_ip[4];
1750+
1751+ union {
1752+ struct {
1753+ u8 protocol;
1754+ u8 _pad[3]; /* fill with 0xa5a5a5 */
1755+ }; /* 3-tuple */
1756+ struct {
1757+ u16 dest_port;
1758+ u16 src_port;
1759+ }; /* 5-tuple */
1760+ u32 ports;
1761+ };
1762+
1763+ u32 _rsv[3];
1764+
1765+ u32 udf;
1766+
1767+ u32 ib2;
1768+ struct mtk_foe_mac_info l2;
1769+};
1770+
1771+struct mtk_foe_ipv6_6rd {
1772+ u32 src_ip[4];
1773+ u32 dest_ip[4];
1774+ u16 dest_port;
1775+ u16 src_port;
1776+
1777+ u32 tunnel_src_ip;
1778+ u32 tunnel_dest_ip;
1779+
1780+ u16 hdr_csum;
1781+ u8 dscp;
1782+ u8 ttl;
1783+
1784+ u8 flag;
1785+ u8 pad;
1786+ u8 per_flow_6rd_id;
1787+ u8 pad2;
1788+
1789+ u32 ib2;
1790+ struct mtk_foe_mac_info l2;
1791+};
1792+
1793+struct mtk_foe_entry {
1794+ u32 ib1;
1795+
1796+ union {
1797+ struct mtk_foe_bridge bridge;
1798+ struct mtk_foe_ipv4 ipv4;
1799+ struct mtk_foe_ipv4_dslite dslite;
1800+ struct mtk_foe_ipv6 ipv6;
1801+ struct mtk_foe_ipv6_6rd ipv6_6rd;
developer7eb15dc2023-06-14 17:44:03 +08001802+ u32 data[23];
developer8cb3ac72022-07-04 10:55:14 +08001803+ };
1804+};
1805+
1806+enum {
1807+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
1808+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
1809+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
1810+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
1811+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
1812+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
1813+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
1814+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
1815+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
1816+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
1817+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
1818+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
1819+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
1820+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
1821+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
1822+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
1823+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
1824+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
1825+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
1826+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
1827+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
1828+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
1829+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
1830+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
1831+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
1832+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
1833+};
1834+
developer7eb15dc2023-06-14 17:44:03 +08001835+enum {
1836+ MTK_FLOW_TYPE_L4,
1837+ MTK_FLOW_TYPE_L2,
1838+ MTK_FLOW_TYPE_L2_SUBFLOW,
1839+};
1840+
1841+struct mtk_flow_entry {
1842+ union {
1843+ /* regular flows + L2 subflows */
1844+ struct {
1845+ struct hlist_node list;
1846+ struct hlist_node l2_list;
1847+ };
1848+ /* L2 flows */
1849+ struct {
1850+ struct rhash_head l2_node;
1851+ struct hlist_head l2_flows;
1852+ };
1853+ };
1854+ u8 type;
1855+ s8 wed_index;
1856+ u8 ppe_index;
1857+ u16 hash;
1858+ struct mtk_foe_entry data;
1859+ struct rhash_head node;
1860+ unsigned long cookie;
1861+ u64 prev_packets, prev_bytes;
1862+ u64 packets, bytes;
1863+};
1864+
1865+struct mtk_mib_entry {
1866+ u32 byt_cnt_l;
1867+ u16 byt_cnt_h;
1868+ u32 pkt_cnt_l;
1869+ u8 pkt_cnt_h;
1870+ u8 _rsv0;
1871+ u32 _rsv1;
1872+} __packed;
1873+
1874+struct mtk_foe_accounting {
1875+ u64 bytes;
1876+ u64 packets;
1877+};
1878+
developer8cb3ac72022-07-04 10:55:14 +08001879+struct mtk_ppe {
developer7eb15dc2023-06-14 17:44:03 +08001880+ struct mtk_eth *eth;
developer8cb3ac72022-07-04 10:55:14 +08001881+ struct device *dev;
1882+ void __iomem *base;
1883+ int version;
developer7eb15dc2023-06-14 17:44:03 +08001884+ char dirname[5];
1885+ bool accounting;
developer8cb3ac72022-07-04 10:55:14 +08001886+
developer7eb15dc2023-06-14 17:44:03 +08001887+ void *foe_table;
developer8cb3ac72022-07-04 10:55:14 +08001888+ dma_addr_t foe_phys;
1889+
developer7eb15dc2023-06-14 17:44:03 +08001890+ struct mtk_mib_entry *mib_table;
1891+ dma_addr_t mib_phys;
1892+
1893+ u16 foe_check_time[MTK_PPE_ENTRIES];
1894+ struct hlist_head *foe_flow;
1895+
1896+ struct rhashtable l2_flows;
1897+
developer8cb3ac72022-07-04 10:55:14 +08001898+ void *acct_table;
1899+};
1900+
developer7eb15dc2023-06-14 17:44:03 +08001901+struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int index);
1902+void mtk_ppe_start(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001903+int mtk_ppe_stop(struct mtk_ppe *ppe);
developer7eb15dc2023-06-14 17:44:03 +08001904+int mtk_ppe_prepare_reset(struct mtk_ppe *ppe);
1905+struct mtk_foe_accounting *mtk_ppe_mib_entry_read(struct mtk_ppe *ppe, u16 index);
1906+
1907+void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash);
developer8cb3ac72022-07-04 10:55:14 +08001908+
1909+static inline void
developer7eb15dc2023-06-14 17:44:03 +08001910+mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
developer8cb3ac72022-07-04 10:55:14 +08001911+{
developer7eb15dc2023-06-14 17:44:03 +08001912+ u16 now, diff;
developer8cb3ac72022-07-04 10:55:14 +08001913+
developer7eb15dc2023-06-14 17:44:03 +08001914+ if (!ppe)
1915+ return;
developer8cb3ac72022-07-04 10:55:14 +08001916+
developer7eb15dc2023-06-14 17:44:03 +08001917+ if (hash > MTK_PPE_HASH_MASK)
1918+ return;
developer8cb3ac72022-07-04 10:55:14 +08001919+
developer7eb15dc2023-06-14 17:44:03 +08001920+ now = (u16)jiffies;
1921+ diff = now - ppe->foe_check_time[hash];
1922+ if (diff < HZ / 10)
1923+ return;
1924+
1925+ ppe->foe_check_time[hash] = now;
1926+ __mtk_ppe_check_skb(ppe, skb, hash);
developer8cb3ac72022-07-04 10:55:14 +08001927+}
1928+
developer7eb15dc2023-06-14 17:44:03 +08001929+int mtk_foe_entry_prepare(struct mtk_eth *eth, struct mtk_foe_entry *entry,
1930+ int type, int l4proto, u8 pse_port, u8 *src_mac,
1931+ u8 *dest_mac);
1932+int mtk_foe_entry_set_pse_port(struct mtk_eth *eth,
1933+ struct mtk_foe_entry *entry, u8 port);
1934+int mtk_foe_entry_set_ipv4_tuple(struct mtk_eth *eth,
1935+ struct mtk_foe_entry *entry, bool orig,
developer8cb3ac72022-07-04 10:55:14 +08001936+ __be32 src_addr, __be16 src_port,
1937+ __be32 dest_addr, __be16 dest_port);
developer7eb15dc2023-06-14 17:44:03 +08001938+int mtk_foe_entry_set_ipv6_tuple(struct mtk_eth *eth,
1939+ struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +08001940+ __be32 *src_addr, __be16 src_port,
1941+ __be32 *dest_addr, __be16 dest_port);
developer7eb15dc2023-06-14 17:44:03 +08001942+int mtk_foe_entry_set_dsa(struct mtk_eth *eth, struct mtk_foe_entry *entry,
1943+ int port);
1944+int mtk_foe_entry_set_vlan(struct mtk_eth *eth, struct mtk_foe_entry *entry,
1945+ int vid);
1946+int mtk_foe_entry_set_pppoe(struct mtk_eth *eth, struct mtk_foe_entry *entry,
1947+ int sid);
1948+int mtk_foe_entry_set_wdma(struct mtk_eth *eth, struct mtk_foe_entry *entry,
1949+ int wdma_idx, int txq, int bss, int wcid);
1950+int mtk_foe_entry_set_queue(struct mtk_eth *eth, struct mtk_foe_entry *entry,
1951+ unsigned int queue);
1952+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
1953+void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
1954+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe, int index);
1955+void mtk_foe_entry_get_stats(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
1956+ int *idle);
developer8cb3ac72022-07-04 10:55:14 +08001957+
1958+#endif
1959diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1960new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +08001961index 0000000..322b8f4
developer8cb3ac72022-07-04 10:55:14 +08001962--- /dev/null
1963+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
developer7eb15dc2023-06-14 17:44:03 +08001964@@ -0,0 +1,221 @@
developer8cb3ac72022-07-04 10:55:14 +08001965+// SPDX-License-Identifier: GPL-2.0-only
1966+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1967+
1968+#include <linux/kernel.h>
1969+#include <linux/debugfs.h>
1970+#include "mtk_eth_soc.h"
1971+
1972+struct mtk_flow_addr_info
1973+{
1974+ void *src, *dest;
1975+ u16 *src_port, *dest_port;
1976+ bool ipv6;
1977+};
1978+
1979+static const char *mtk_foe_entry_state_str(int state)
1980+{
1981+ static const char * const state_str[] = {
1982+ [MTK_FOE_STATE_INVALID] = "INV",
1983+ [MTK_FOE_STATE_UNBIND] = "UNB",
1984+ [MTK_FOE_STATE_BIND] = "BND",
1985+ [MTK_FOE_STATE_FIN] = "FIN",
1986+ };
1987+
1988+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1989+ return "UNK";
1990+
1991+ return state_str[state];
1992+}
1993+
1994+static const char *mtk_foe_pkt_type_str(int type)
1995+{
1996+ static const char * const type_str[] = {
1997+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1998+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
developer8cb3ac72022-07-04 10:55:14 +08001999+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
2000+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
2001+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
2002+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
2003+ };
2004+
2005+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
2006+ return "UNKNOWN";
2007+
2008+ return type_str[type];
2009+}
2010+
2011+static void
2012+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
2013+{
2014+ u32 n_addr[4];
2015+ int i;
2016+
2017+ if (!ipv6) {
2018+ seq_printf(m, "%pI4h", addr);
2019+ return;
2020+ }
2021+
2022+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
2023+ n_addr[i] = htonl(addr[i]);
2024+ seq_printf(m, "%pI6", n_addr);
2025+}
2026+
2027+static void
2028+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
2029+{
2030+ mtk_print_addr(m, ai->src, ai->ipv6);
2031+ if (ai->src_port)
2032+ seq_printf(m, ":%d", *ai->src_port);
2033+ seq_printf(m, "->");
2034+ mtk_print_addr(m, ai->dest, ai->ipv6);
2035+ if (ai->dest_port)
2036+ seq_printf(m, ":%d", *ai->dest_port);
2037+}
2038+
2039+static int
2040+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
2041+{
2042+ struct mtk_ppe *ppe = m->private;
2043+ int i;
2044+
2045+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
developer7eb15dc2023-06-14 17:44:03 +08002046+ struct mtk_foe_entry *entry = mtk_foe_get_entry(ppe, i);
developer8cb3ac72022-07-04 10:55:14 +08002047+ struct mtk_foe_mac_info *l2;
2048+ struct mtk_flow_addr_info ai = {};
developer7eb15dc2023-06-14 17:44:03 +08002049+ struct mtk_foe_accounting *acct;
developer8cb3ac72022-07-04 10:55:14 +08002050+ unsigned char h_source[ETH_ALEN];
2051+ unsigned char h_dest[ETH_ALEN];
2052+ int type, state;
2053+ u32 ib2;
2054+
2055+
2056+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
2057+ if (!state)
2058+ continue;
2059+
2060+ if (bind && state != MTK_FOE_STATE_BIND)
2061+ continue;
2062+
developer7eb15dc2023-06-14 17:44:03 +08002063+ acct = mtk_ppe_mib_entry_read(ppe, i);
2064+
developer8cb3ac72022-07-04 10:55:14 +08002065+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
2066+ seq_printf(m, "%05x %s %7s", i,
2067+ mtk_foe_entry_state_str(state),
2068+ mtk_foe_pkt_type_str(type));
2069+
2070+ switch (type) {
2071+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
2072+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
2073+ ai.src_port = &entry->ipv4.orig.src_port;
2074+ ai.dest_port = &entry->ipv4.orig.dest_port;
2075+ fallthrough;
2076+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
2077+ ai.src = &entry->ipv4.orig.src_ip;
2078+ ai.dest = &entry->ipv4.orig.dest_ip;
2079+ break;
2080+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
2081+ ai.src_port = &entry->ipv6.src_port;
2082+ ai.dest_port = &entry->ipv6.dest_port;
2083+ fallthrough;
2084+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
2085+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
2086+ ai.src = &entry->ipv6.src_ip;
2087+ ai.dest = &entry->ipv6.dest_ip;
2088+ ai.ipv6 = true;
2089+ break;
2090+ }
2091+
2092+ seq_printf(m, " orig=");
2093+ mtk_print_addr_info(m, &ai);
2094+
2095+ switch (type) {
2096+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
2097+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
2098+ ai.src_port = &entry->ipv4.new.src_port;
2099+ ai.dest_port = &entry->ipv4.new.dest_port;
2100+ fallthrough;
2101+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
2102+ ai.src = &entry->ipv4.new.src_ip;
2103+ ai.dest = &entry->ipv4.new.dest_ip;
2104+ seq_printf(m, " new=");
2105+ mtk_print_addr_info(m, &ai);
2106+ break;
2107+ }
2108+
2109+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
2110+ l2 = &entry->ipv6.l2;
2111+ ib2 = entry->ipv6.ib2;
2112+ } else {
2113+ l2 = &entry->ipv4.l2;
2114+ ib2 = entry->ipv4.ib2;
2115+ }
2116+
2117+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
2118+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
2119+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
2120+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
2121+
2122+ seq_printf(m, " eth=%pM->%pM etype=%04x"
developer7eb15dc2023-06-14 17:44:03 +08002123+ " vlan=%d,%d ib1=%08x ib2=%08x"
2124+ " packets=%llu bytes=%llu\n",
developer8cb3ac72022-07-04 10:55:14 +08002125+ h_source, h_dest, ntohs(l2->etype),
developer7eb15dc2023-06-14 17:44:03 +08002126+ l2->vlan1, l2->vlan2, entry->ib1, ib2,
2127+ acct ? acct->packets : 0, acct ? acct->bytes : 0);
developer8cb3ac72022-07-04 10:55:14 +08002128+ }
2129+
2130+ return 0;
2131+}
2132+
2133+static int
2134+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
2135+{
2136+ return mtk_ppe_debugfs_foe_show(m, private, false);
2137+}
2138+
2139+static int
2140+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
2141+{
2142+ return mtk_ppe_debugfs_foe_show(m, private, true);
2143+}
2144+
2145+static int
2146+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
2147+{
2148+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
2149+ inode->i_private);
2150+}
2151+
2152+static int
2153+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
2154+{
2155+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
2156+ inode->i_private);
2157+}
2158+
developer7eb15dc2023-06-14 17:44:03 +08002159+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe, int index)
developer8cb3ac72022-07-04 10:55:14 +08002160+{
2161+ static const struct file_operations fops_all = {
2162+ .open = mtk_ppe_debugfs_foe_open_all,
2163+ .read = seq_read,
2164+ .llseek = seq_lseek,
2165+ .release = single_release,
2166+ };
developer8cb3ac72022-07-04 10:55:14 +08002167+ static const struct file_operations fops_bind = {
2168+ .open = mtk_ppe_debugfs_foe_open_bind,
2169+ .read = seq_read,
2170+ .llseek = seq_lseek,
2171+ .release = single_release,
2172+ };
developer8cb3ac72022-07-04 10:55:14 +08002173+ struct dentry *root;
2174+
developer7eb15dc2023-06-14 17:44:03 +08002175+ snprintf(ppe->dirname, sizeof(ppe->dirname), "ppe%d", index);
2176+
2177+ root = debugfs_create_dir(ppe->dirname, NULL);
2178+ if (!root)
2179+ return -ENOMEM;
2180+
developer8cb3ac72022-07-04 10:55:14 +08002181+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
2182+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
2183+
2184+ return 0;
2185+}
2186diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
2187new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +08002188index 0000000..afe3780
developer8cb3ac72022-07-04 10:55:14 +08002189--- /dev/null
2190+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developer7eb15dc2023-06-14 17:44:03 +08002191@@ -0,0 +1,566 @@
developer8cb3ac72022-07-04 10:55:14 +08002192+// SPDX-License-Identifier: GPL-2.0-only
2193+/*
2194+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
2195+ */
2196+
2197+#include <linux/if_ether.h>
2198+#include <linux/rhashtable.h>
2199+#include <linux/ip.h>
2200+#include <linux/ipv6.h>
2201+#include <net/flow_offload.h>
2202+#include <net/pkt_cls.h>
2203+#include <net/dsa.h>
2204+#include "mtk_eth_soc.h"
2205+
2206+struct mtk_flow_data {
2207+ struct ethhdr eth;
2208+
2209+ union {
2210+ struct {
2211+ __be32 src_addr;
2212+ __be32 dst_addr;
2213+ } v4;
2214+
2215+ struct {
2216+ struct in6_addr src_addr;
2217+ struct in6_addr dst_addr;
2218+ } v6;
2219+ };
2220+
2221+ __be16 src_port;
2222+ __be16 dst_port;
2223+
developer7eb15dc2023-06-14 17:44:03 +08002224+ u16 vlan_in;
2225+
developer8cb3ac72022-07-04 10:55:14 +08002226+ struct {
2227+ u16 id;
2228+ __be16 proto;
2229+ u8 num;
2230+ } vlan;
2231+ struct {
2232+ u16 sid;
2233+ u8 num;
2234+ } pppoe;
2235+};
2236+
developer8cb3ac72022-07-04 10:55:14 +08002237+static const struct rhashtable_params mtk_flow_ht_params = {
2238+ .head_offset = offsetof(struct mtk_flow_entry, node),
2239+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
2240+ .key_len = sizeof(unsigned long),
2241+ .automatic_shrinking = true,
2242+};
2243+
developer8cb3ac72022-07-04 10:55:14 +08002244+static int
developer7eb15dc2023-06-14 17:44:03 +08002245+mtk_flow_set_ipv4_addr(struct mtk_eth *eth, struct mtk_foe_entry *foe,
2246+ struct mtk_flow_data *data, bool egress)
developer8cb3ac72022-07-04 10:55:14 +08002247+{
developer7eb15dc2023-06-14 17:44:03 +08002248+ return mtk_foe_entry_set_ipv4_tuple(eth, foe, egress,
developer8cb3ac72022-07-04 10:55:14 +08002249+ data->v4.src_addr, data->src_port,
2250+ data->v4.dst_addr, data->dst_port);
2251+}
2252+
2253+static int
developer7eb15dc2023-06-14 17:44:03 +08002254+mtk_flow_set_ipv6_addr(struct mtk_eth *eth, struct mtk_foe_entry *foe,
2255+ struct mtk_flow_data *data)
developer8cb3ac72022-07-04 10:55:14 +08002256+{
developer7eb15dc2023-06-14 17:44:03 +08002257+ return mtk_foe_entry_set_ipv6_tuple(eth, foe,
developer8cb3ac72022-07-04 10:55:14 +08002258+ data->v6.src_addr.s6_addr32, data->src_port,
2259+ data->v6.dst_addr.s6_addr32, data->dst_port);
2260+}
2261+
2262+static void
2263+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
2264+{
2265+ void *dest = eth + act->mangle.offset;
2266+ const void *src = &act->mangle.val;
2267+
2268+ if (act->mangle.offset > 8)
2269+ return;
2270+
2271+ if (act->mangle.mask == 0xffff) {
2272+ src += 2;
2273+ dest += 2;
2274+ }
2275+
2276+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
2277+}
2278+
developer8cb3ac72022-07-04 10:55:14 +08002279+static int
2280+mtk_flow_mangle_ports(const struct flow_action_entry *act,
2281+ struct mtk_flow_data *data)
2282+{
2283+ u32 val = ntohl(act->mangle.val);
2284+
2285+ switch (act->mangle.offset) {
2286+ case 0:
2287+ if (act->mangle.mask == ~htonl(0xffff))
2288+ data->dst_port = cpu_to_be16(val);
2289+ else
2290+ data->src_port = cpu_to_be16(val >> 16);
2291+ break;
2292+ case 2:
2293+ data->dst_port = cpu_to_be16(val);
2294+ break;
2295+ default:
2296+ return -EINVAL;
2297+ }
2298+
2299+ return 0;
2300+}
2301+
2302+static int
2303+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
2304+ struct mtk_flow_data *data)
2305+{
2306+ __be32 *dest;
2307+
2308+ switch (act->mangle.offset) {
2309+ case offsetof(struct iphdr, saddr):
2310+ dest = &data->v4.src_addr;
2311+ break;
2312+ case offsetof(struct iphdr, daddr):
2313+ dest = &data->v4.dst_addr;
2314+ break;
2315+ default:
2316+ return -EINVAL;
2317+ }
2318+
2319+ memcpy(dest, &act->mangle.val, sizeof(u32));
2320+
2321+ return 0;
2322+}
2323+
2324+static int
2325+mtk_flow_get_dsa_port(struct net_device **dev)
2326+{
2327+#if IS_ENABLED(CONFIG_NET_DSA)
2328+ struct dsa_port *dp;
2329+
2330+ dp = dsa_port_from_netdev(*dev);
2331+ if (IS_ERR(dp))
2332+ return -ENODEV;
2333+
2334+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
2335+ return -ENODEV;
2336+
2337+ *dev = dp->cpu_dp->master;
2338+
2339+ return dp->index;
2340+#else
2341+ return -ENODEV;
2342+#endif
2343+}
2344+
2345+static int
2346+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
developer7eb15dc2023-06-14 17:44:03 +08002347+ struct net_device *dev, const u8 *dest_mac,
2348+ int *wed_index)
developer8cb3ac72022-07-04 10:55:14 +08002349+{
developer7eb15dc2023-06-14 17:44:03 +08002350+ int pse_port, dsa_port, queue;
developer8cb3ac72022-07-04 10:55:14 +08002351+
2352+ dsa_port = mtk_flow_get_dsa_port(&dev);
developer8cb3ac72022-07-04 10:55:14 +08002353+
2354+ if (dev == eth->netdev[0])
developer7eb15dc2023-06-14 17:44:03 +08002355+ pse_port = 1;
developer8cb3ac72022-07-04 10:55:14 +08002356+ else if (dev == eth->netdev[1])
developer7eb15dc2023-06-14 17:44:03 +08002357+ pse_port = 2;
developer8cb3ac72022-07-04 10:55:14 +08002358+ else
2359+ return -EOPNOTSUPP;
2360+
developer7eb15dc2023-06-14 17:44:03 +08002361+ if (dsa_port >= 0) {
2362+ mtk_foe_entry_set_dsa(eth, foe, dsa_port);
2363+ queue = 3 + dsa_port;
2364+ } else {
2365+ queue = pse_port - 1;
2366+ }
2367+ mtk_foe_entry_set_queue(eth, foe, queue);
2368+
2369+out:
2370+ mtk_foe_entry_set_pse_port(eth, foe, pse_port);
developer8cb3ac72022-07-04 10:55:14 +08002371+
2372+ return 0;
2373+}
2374+
2375+static int
developer7eb15dc2023-06-14 17:44:03 +08002376+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f,
2377+ int ppe_index)
developer8cb3ac72022-07-04 10:55:14 +08002378+{
2379+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2380+ struct flow_action_entry *act;
2381+ struct mtk_flow_data data = {};
2382+ struct mtk_foe_entry foe;
2383+ struct net_device *odev = NULL;
2384+ struct mtk_flow_entry *entry;
2385+ int offload_type = 0;
developer7eb15dc2023-06-14 17:44:03 +08002386+ int wed_index = -1;
developer8cb3ac72022-07-04 10:55:14 +08002387+ u16 addr_type = 0;
developer8cb3ac72022-07-04 10:55:14 +08002388+ u8 l4proto = 0;
2389+ int err = 0;
developer8cb3ac72022-07-04 10:55:14 +08002390+ int i;
2391+
2392+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
2393+ return -EEXIST;
2394+
2395+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
2396+ struct flow_match_meta match;
2397+
2398+ flow_rule_match_meta(rule, &match);
2399+ } else {
2400+ return -EOPNOTSUPP;
2401+ }
2402+
2403+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2404+ struct flow_match_control match;
2405+
2406+ flow_rule_match_control(rule, &match);
2407+ addr_type = match.key->addr_type;
2408+ } else {
2409+ return -EOPNOTSUPP;
2410+ }
2411+
2412+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2413+ struct flow_match_basic match;
2414+
2415+ flow_rule_match_basic(rule, &match);
2416+ l4proto = match.key->ip_proto;
2417+ } else {
2418+ return -EOPNOTSUPP;
2419+ }
2420+
developer7eb15dc2023-06-14 17:44:03 +08002421+ switch (addr_type) {
2422+ case 0:
2423+ offload_type = MTK_PPE_PKT_TYPE_BRIDGE;
2424+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2425+ struct flow_match_eth_addrs match;
2426+
2427+ flow_rule_match_eth_addrs(rule, &match);
2428+ memcpy(data.eth.h_dest, match.key->dst, ETH_ALEN);
2429+ memcpy(data.eth.h_source, match.key->src, ETH_ALEN);
2430+ } else {
2431+ return -EOPNOTSUPP;
2432+ }
2433+
2434+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
2435+ struct flow_match_vlan match;
2436+
2437+ flow_rule_match_vlan(rule, &match);
2438+
2439+ if (match.key->vlan_tpid != cpu_to_be16(ETH_P_8021Q))
2440+ return -EOPNOTSUPP;
2441+
2442+ data.vlan_in = match.key->vlan_id;
2443+ }
2444+ break;
2445+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
2446+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
2447+ break;
2448+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
2449+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
2450+ break;
2451+ default:
2452+ return -EOPNOTSUPP;
2453+ }
2454+
developer8cb3ac72022-07-04 10:55:14 +08002455+ flow_action_for_each(i, act, &rule->action) {
2456+ switch (act->id) {
2457+ case FLOW_ACTION_MANGLE:
developer7eb15dc2023-06-14 17:44:03 +08002458+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
2459+ return -EOPNOTSUPP;
developer8cb3ac72022-07-04 10:55:14 +08002460+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
2461+ mtk_flow_offload_mangle_eth(act, &data.eth);
2462+ break;
2463+ case FLOW_ACTION_REDIRECT:
2464+ odev = act->dev;
2465+ break;
2466+ case FLOW_ACTION_CSUM:
2467+ break;
2468+ case FLOW_ACTION_VLAN_PUSH:
2469+ if (data.vlan.num == 1 ||
2470+ act->vlan.proto != htons(ETH_P_8021Q))
2471+ return -EOPNOTSUPP;
2472+
2473+ data.vlan.id = act->vlan.vid;
2474+ data.vlan.proto = act->vlan.proto;
2475+ data.vlan.num++;
2476+ break;
2477+ case FLOW_ACTION_VLAN_POP:
2478+ break;
2479+ case FLOW_ACTION_PPPOE_PUSH:
2480+ if (data.pppoe.num == 1)
2481+ return -EOPNOTSUPP;
2482+
2483+ data.pppoe.sid = act->pppoe.sid;
2484+ data.pppoe.num++;
2485+ break;
2486+ default:
2487+ return -EOPNOTSUPP;
2488+ }
2489+ }
2490+
developer8cb3ac72022-07-04 10:55:14 +08002491+ if (!is_valid_ether_addr(data.eth.h_source) ||
2492+ !is_valid_ether_addr(data.eth.h_dest))
2493+ return -EINVAL;
2494+
developer7eb15dc2023-06-14 17:44:03 +08002495+ err = mtk_foe_entry_prepare(eth, &foe, offload_type, l4proto, 0,
2496+ data.eth.h_source, data.eth.h_dest);
developer8cb3ac72022-07-04 10:55:14 +08002497+ if (err)
2498+ return err;
2499+
2500+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2501+ struct flow_match_ports ports;
2502+
developer7eb15dc2023-06-14 17:44:03 +08002503+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
2504+ return -EOPNOTSUPP;
2505+
developer8cb3ac72022-07-04 10:55:14 +08002506+ flow_rule_match_ports(rule, &ports);
2507+ data.src_port = ports.key->src;
2508+ data.dst_port = ports.key->dst;
developer7eb15dc2023-06-14 17:44:03 +08002509+ } else if (offload_type != MTK_PPE_PKT_TYPE_BRIDGE) {
developer8cb3ac72022-07-04 10:55:14 +08002510+ return -EOPNOTSUPP;
2511+ }
2512+
2513+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2514+ struct flow_match_ipv4_addrs addrs;
2515+
2516+ flow_rule_match_ipv4_addrs(rule, &addrs);
2517+
2518+ data.v4.src_addr = addrs.key->src;
2519+ data.v4.dst_addr = addrs.key->dst;
2520+
developer7eb15dc2023-06-14 17:44:03 +08002521+ mtk_flow_set_ipv4_addr(eth, &foe, &data, false);
developer8cb3ac72022-07-04 10:55:14 +08002522+ }
2523+
2524+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2525+ struct flow_match_ipv6_addrs addrs;
2526+
2527+ flow_rule_match_ipv6_addrs(rule, &addrs);
2528+
2529+ data.v6.src_addr = addrs.key->src;
2530+ data.v6.dst_addr = addrs.key->dst;
2531+
developer7eb15dc2023-06-14 17:44:03 +08002532+ mtk_flow_set_ipv6_addr(eth, &foe, &data);
developer8cb3ac72022-07-04 10:55:14 +08002533+ }
2534+
2535+ flow_action_for_each(i, act, &rule->action) {
2536+ if (act->id != FLOW_ACTION_MANGLE)
2537+ continue;
2538+
developer7eb15dc2023-06-14 17:44:03 +08002539+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
2540+ return -EOPNOTSUPP;
2541+
developer8cb3ac72022-07-04 10:55:14 +08002542+ switch (act->mangle.htype) {
2543+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
2544+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
2545+ err = mtk_flow_mangle_ports(act, &data);
2546+ break;
2547+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
2548+ err = mtk_flow_mangle_ipv4(act, &data);
2549+ break;
2550+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
2551+ /* handled earlier */
2552+ break;
2553+ default:
2554+ return -EOPNOTSUPP;
2555+ }
2556+
2557+ if (err)
2558+ return err;
2559+ }
2560+
2561+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
developer7eb15dc2023-06-14 17:44:03 +08002562+ err = mtk_flow_set_ipv4_addr(eth, &foe, &data, true);
developer8cb3ac72022-07-04 10:55:14 +08002563+ if (err)
2564+ return err;
2565+ }
2566+
developer7eb15dc2023-06-14 17:44:03 +08002567+ if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
2568+ foe.bridge.vlan = data.vlan_in;
2569+
developer8cb3ac72022-07-04 10:55:14 +08002570+ if (data.vlan.num == 1) {
2571+ if (data.vlan.proto != htons(ETH_P_8021Q))
2572+ return -EOPNOTSUPP;
2573+
developer7eb15dc2023-06-14 17:44:03 +08002574+ mtk_foe_entry_set_vlan(eth, &foe, data.vlan.id);
developer8cb3ac72022-07-04 10:55:14 +08002575+ }
2576+ if (data.pppoe.num == 1)
developer7eb15dc2023-06-14 17:44:03 +08002577+ mtk_foe_entry_set_pppoe(eth, &foe, data.pppoe.sid);
developer8cb3ac72022-07-04 10:55:14 +08002578+
developer7eb15dc2023-06-14 17:44:03 +08002579+ err = mtk_flow_set_output_device(eth, &foe, odev, data.eth.h_dest,
2580+ &wed_index);
developer8cb3ac72022-07-04 10:55:14 +08002581+ if (err)
2582+ return err;
2583+
2584+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
2585+ if (!entry)
2586+ return -ENOMEM;
2587+
2588+ entry->cookie = f->cookie;
developer7eb15dc2023-06-14 17:44:03 +08002589+ memcpy(&entry->data, &foe, sizeof(entry->data));
2590+ entry->ppe_index = ppe_index;
2591+
2592+ err = mtk_foe_entry_commit(eth->ppe[entry->ppe_index], entry);
2593+ if (err < 0)
developer8cb3ac72022-07-04 10:55:14 +08002594+ goto free;
developer8cb3ac72022-07-04 10:55:14 +08002595+
developer8cb3ac72022-07-04 10:55:14 +08002596+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
2597+ mtk_flow_ht_params);
2598+ if (err < 0)
developer7eb15dc2023-06-14 17:44:03 +08002599+ goto clear;
developer8cb3ac72022-07-04 10:55:14 +08002600+
2601+ return 0;
developer7eb15dc2023-06-14 17:44:03 +08002602+
2603+clear:
2604+ mtk_foe_entry_clear(eth->ppe[entry->ppe_index], entry);
developer8cb3ac72022-07-04 10:55:14 +08002605+free:
2606+ kfree(entry);
2607+ return err;
2608+}
2609+
2610+static int
2611+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
2612+{
2613+ struct mtk_flow_entry *entry;
2614+
2615+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
2616+ mtk_flow_ht_params);
2617+ if (!entry)
2618+ return -ENOENT;
2619+
developer7eb15dc2023-06-14 17:44:03 +08002620+ mtk_foe_entry_clear(eth->ppe[entry->ppe_index], entry);
developer8cb3ac72022-07-04 10:55:14 +08002621+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
2622+ mtk_flow_ht_params);
2623+ kfree(entry);
2624+
2625+ return 0;
2626+}
2627+
2628+static int
2629+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
2630+{
2631+ struct mtk_flow_entry *entry;
developer7eb15dc2023-06-14 17:44:03 +08002632+ u64 packets, bytes;
2633+ int idle;
developer8cb3ac72022-07-04 10:55:14 +08002634+
2635+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
2636+ mtk_flow_ht_params);
2637+ if (!entry)
2638+ return -ENOENT;
2639+
developer7eb15dc2023-06-14 17:44:03 +08002640+ packets = entry->packets;
2641+ bytes = entry->bytes;
2642+ mtk_foe_entry_get_stats(eth->ppe[entry->ppe_index], entry, &idle);
2643+ f->stats.pkts += entry->packets - packets;
2644+ f->stats.bytes += entry->bytes - bytes;
developer8cb3ac72022-07-04 10:55:14 +08002645+ f->stats.lastused = jiffies - idle * HZ;
2646+
2647+ return 0;
2648+}
2649+
2650+static DEFINE_MUTEX(mtk_flow_offload_mutex);
2651+
developer7eb15dc2023-06-14 17:44:03 +08002652+int mtk_flow_offload_cmd(struct mtk_eth *eth, struct flow_cls_offload *cls,
2653+ int ppe_index)
developer8cb3ac72022-07-04 10:55:14 +08002654+{
developer8cb3ac72022-07-04 10:55:14 +08002655+ int err;
2656+
developer8cb3ac72022-07-04 10:55:14 +08002657+ mutex_lock(&mtk_flow_offload_mutex);
2658+ switch (cls->command) {
2659+ case FLOW_CLS_REPLACE:
developer7eb15dc2023-06-14 17:44:03 +08002660+ err = mtk_flow_offload_replace(eth, cls, ppe_index);
developer8cb3ac72022-07-04 10:55:14 +08002661+ break;
2662+ case FLOW_CLS_DESTROY:
2663+ err = mtk_flow_offload_destroy(eth, cls);
2664+ break;
2665+ case FLOW_CLS_STATS:
2666+ err = mtk_flow_offload_stats(eth, cls);
2667+ break;
2668+ default:
2669+ err = -EOPNOTSUPP;
2670+ break;
2671+ }
2672+ mutex_unlock(&mtk_flow_offload_mutex);
2673+
2674+ return err;
2675+}
2676+
2677+static int
developer7eb15dc2023-06-14 17:44:03 +08002678+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
2679+{
2680+ struct flow_cls_offload *cls = type_data;
2681+ struct net_device *dev = cb_priv;
2682+ struct mtk_mac *mac = netdev_priv(dev);
2683+ struct mtk_eth *eth = mac->hw;
2684+
2685+ if (!tc_can_offload(dev))
2686+ return -EOPNOTSUPP;
2687+
2688+ if (type != TC_SETUP_CLSFLOWER)
2689+ return -EOPNOTSUPP;
2690+
2691+ return mtk_flow_offload_cmd(eth, cls, 0);
2692+}
2693+
2694+static int
developer8cb3ac72022-07-04 10:55:14 +08002695+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
2696+{
2697+ struct mtk_mac *mac = netdev_priv(dev);
2698+ struct mtk_eth *eth = mac->hw;
2699+ static LIST_HEAD(block_cb_list);
2700+ struct flow_block_cb *block_cb;
2701+ flow_setup_cb_t *cb;
developer207b39d2022-10-07 15:57:16 +08002702+
developer7eb15dc2023-06-14 17:44:03 +08002703+ if (!eth->soc->offload_version)
developer8cb3ac72022-07-04 10:55:14 +08002704+ return -EOPNOTSUPP;
2705+
2706+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
2707+ return -EOPNOTSUPP;
2708+
2709+ cb = mtk_eth_setup_tc_block_cb;
2710+ f->driver_block_list = &block_cb_list;
2711+
2712+ switch (f->command) {
2713+ case FLOW_BLOCK_BIND:
2714+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
2715+ if (block_cb) {
2716+ flow_block_cb_incref(block_cb);
developer7eb15dc2023-06-14 17:44:03 +08002717+ return 0;
developer8cb3ac72022-07-04 10:55:14 +08002718+ }
2719+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developer7eb15dc2023-06-14 17:44:03 +08002720+ if (IS_ERR(block_cb))
2721+ return PTR_ERR(block_cb);
developer8cb3ac72022-07-04 10:55:14 +08002722+
developer7eb15dc2023-06-14 17:44:03 +08002723+ flow_block_cb_incref(block_cb);
developer8cb3ac72022-07-04 10:55:14 +08002724+ flow_block_cb_add(block_cb, f);
2725+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developer7eb15dc2023-06-14 17:44:03 +08002726+ return 0;
developer8cb3ac72022-07-04 10:55:14 +08002727+ case FLOW_BLOCK_UNBIND:
2728+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developer7eb15dc2023-06-14 17:44:03 +08002729+ if (!block_cb)
2730+ return -ENOENT;
developer8cb3ac72022-07-04 10:55:14 +08002731+
developer7eb15dc2023-06-14 17:44:03 +08002732+ if (!flow_block_cb_decref(block_cb)) {
developer8cb3ac72022-07-04 10:55:14 +08002733+ flow_block_cb_remove(block_cb, f);
2734+ list_del(&block_cb->driver_list);
2735+ }
developer7eb15dc2023-06-14 17:44:03 +08002736+ return 0;
developer8cb3ac72022-07-04 10:55:14 +08002737+ default:
developer7eb15dc2023-06-14 17:44:03 +08002738+ return -EOPNOTSUPP;
developer8cb3ac72022-07-04 10:55:14 +08002739+ }
2740+}
2741+
2742+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
2743+ void *type_data)
2744+{
developer7eb15dc2023-06-14 17:44:03 +08002745+ switch (type) {
2746+ case TC_SETUP_BLOCK:
2747+ case TC_SETUP_FT:
developer8cb3ac72022-07-04 10:55:14 +08002748+ return mtk_eth_setup_tc_block(dev, type_data);
developer7eb15dc2023-06-14 17:44:03 +08002749+ default:
2750+ return -EOPNOTSUPP;
2751+ }
developer8cb3ac72022-07-04 10:55:14 +08002752+}
2753+
2754+int mtk_eth_offload_init(struct mtk_eth *eth)
2755+{
developer8cb3ac72022-07-04 10:55:14 +08002756+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
2757+}
2758diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
2759new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +08002760index 0000000..a2e61b3
developer8cb3ac72022-07-04 10:55:14 +08002761--- /dev/null
2762+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
developer7eb15dc2023-06-14 17:44:03 +08002763@@ -0,0 +1,172 @@
developer8cb3ac72022-07-04 10:55:14 +08002764+// SPDX-License-Identifier: GPL-2.0-only
2765+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
2766+
2767+#ifndef __MTK_PPE_REGS_H
2768+#define __MTK_PPE_REGS_H
2769+
2770+#define MTK_PPE_GLO_CFG 0x200
2771+#define MTK_PPE_GLO_CFG_EN BIT(0)
2772+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
2773+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
2774+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
2775+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
2776+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
2777+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
2778+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
2779+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
2780+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
2781+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
2782+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
2783+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
2784+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
2785+
2786+#define MTK_PPE_FLOW_CFG 0x204
developer7eb15dc2023-06-14 17:44:03 +08002787+#define MTK_PPE_MD_TOAP_BYP_CRSN0 BIT(1)
2788+#define MTK_PPE_MD_TOAP_BYP_CRSN1 BIT(2)
2789+#define MTK_PPE_MD_TOAP_BYP_CRSN2 BIT(3)
developer8cb3ac72022-07-04 10:55:14 +08002790+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
2791+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
2792+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
2793+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
2794+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
2795+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
2796+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
2797+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
2798+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
2799+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
2800+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
2801+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
2802+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
2803+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
2804+
2805+#define MTK_PPE_IP_PROTO_CHK 0x208
2806+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
2807+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
2808+
2809+#define MTK_PPE_TB_CFG 0x21c
2810+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
2811+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
2812+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
2813+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
2814+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
2815+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
2816+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
2817+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
2818+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
2819+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
2820+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
2821+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
2822+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
developer7eb15dc2023-06-14 17:44:03 +08002823+#define MTK_PPE_TB_CFG_INFO_SEL BIT(20)
2824+#define MTK_PPE_TB_TICK_SEL BIT(24)
2825+
2826+#define MTK_PPE_BIND_LMT1 0x230
2827+#define MTK_PPE_NTU_KEEPALIVE GENMASK(23, 16)
2828+
2829+#define MTK_PPE_KEEPALIVE 0x234
developer8cb3ac72022-07-04 10:55:14 +08002830+
2831+enum {
2832+ MTK_PPE_SCAN_MODE_DISABLED,
2833+ MTK_PPE_SCAN_MODE_CHECK_AGE,
2834+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
2835+};
2836+
2837+enum {
2838+ MTK_PPE_KEEPALIVE_DISABLE,
2839+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
2840+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
2841+};
2842+
2843+enum {
2844+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
2845+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
2846+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
2847+};
2848+
2849+#define MTK_PPE_TB_BASE 0x220
2850+
2851+#define MTK_PPE_TB_USED 0x224
2852+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
2853+
2854+#define MTK_PPE_BIND_RATE 0x228
2855+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
2856+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
2857+
2858+#define MTK_PPE_BIND_LIMIT0 0x22c
2859+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
2860+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
2861+
2862+#define MTK_PPE_BIND_LIMIT1 0x230
2863+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
2864+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
2865+
2866+#define MTK_PPE_KEEPALIVE 0x234
2867+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
2868+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
2869+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
2870+
2871+#define MTK_PPE_UNBIND_AGE 0x238
2872+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
2873+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
2874+
2875+#define MTK_PPE_BIND_AGE0 0x23c
2876+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
2877+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
2878+
2879+#define MTK_PPE_BIND_AGE1 0x240
2880+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
2881+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
2882+
2883+#define MTK_PPE_HASH_SEED 0x244
2884+
2885+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
2886+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
2887+
developer7eb15dc2023-06-14 17:44:03 +08002888+#define MTK_PPE_DEFAULT_CPU_PORT1 0x24c
2889+
developer8cb3ac72022-07-04 10:55:14 +08002890+#define MTK_PPE_MTU_DROP 0x308
2891+
2892+#define MTK_PPE_VLAN_MTU0 0x30c
2893+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
2894+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
2895+
2896+#define MTK_PPE_VLAN_MTU1 0x310
2897+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
2898+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
2899+
2900+#define MTK_PPE_VPM_TPID 0x318
2901+
2902+#define MTK_PPE_CACHE_CTL 0x320
2903+#define MTK_PPE_CACHE_CTL_EN BIT(0)
2904+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
2905+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
2906+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
2907+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
2908+
2909+#define MTK_PPE_MIB_CFG 0x334
2910+#define MTK_PPE_MIB_CFG_EN BIT(0)
2911+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
2912+
2913+#define MTK_PPE_MIB_TB_BASE 0x338
2914+
developer7eb15dc2023-06-14 17:44:03 +08002915+#define MTK_PPE_MIB_SER_CR 0x33C
2916+#define MTK_PPE_MIB_SER_CR_ST BIT(16)
2917+#define MTK_PPE_MIB_SER_CR_ADDR GENMASK(13, 0)
2918+
2919+#define MTK_PPE_MIB_SER_R0 0x340
2920+#define MTK_PPE_MIB_SER_R0_BYTE_CNT_LOW GENMASK(31, 0)
2921+
2922+#define MTK_PPE_MIB_SER_R1 0x344
2923+#define MTK_PPE_MIB_SER_R1_PKT_CNT_LOW GENMASK(31, 16)
2924+#define MTK_PPE_MIB_SER_R1_BYTE_CNT_HIGH GENMASK(15, 0)
2925+
2926+#define MTK_PPE_MIB_SER_R2 0x348
2927+#define MTK_PPE_MIB_SER_R2_PKT_CNT_HIGH GENMASK(23, 0)
2928+
developer8cb3ac72022-07-04 10:55:14 +08002929+#define MTK_PPE_MIB_CACHE_CTL 0x350
2930+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
2931+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
2932+
developer7eb15dc2023-06-14 17:44:03 +08002933+#define MTK_PPE_SBW_CTRL 0x374
2934+
developer8cb3ac72022-07-04 10:55:14 +08002935+#endif
2936diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
developer7eb15dc2023-06-14 17:44:03 +08002937index 078c0f4..f8a98d8 100644
developer8cb3ac72022-07-04 10:55:14 +08002938--- a/drivers/net/ppp/ppp_generic.c
2939+++ b/drivers/net/ppp/ppp_generic.c
2940@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
2941 ppp_destroy_interface(ppp);
2942 }
2943
2944+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
2945+ struct net_device_path *path)
2946+{
2947+ struct ppp *ppp = netdev_priv(ctx->dev);
2948+ struct ppp_channel *chan;
2949+ struct channel *pch;
2950+
2951+ if (ppp->flags & SC_MULTILINK)
2952+ return -EOPNOTSUPP;
2953+
2954+ if (list_empty(&ppp->channels))
2955+ return -ENODEV;
2956+
2957+ pch = list_first_entry(&ppp->channels, struct channel, clist);
2958+ chan = pch->chan;
2959+ if (!chan->ops->fill_forward_path)
2960+ return -EOPNOTSUPP;
2961+
2962+ return chan->ops->fill_forward_path(ctx, path, chan);
2963+}
2964+
2965 static const struct net_device_ops ppp_netdev_ops = {
2966 .ndo_init = ppp_dev_init,
2967 .ndo_uninit = ppp_dev_uninit,
2968 .ndo_start_xmit = ppp_start_xmit,
2969 .ndo_do_ioctl = ppp_net_ioctl,
2970 .ndo_get_stats64 = ppp_get_stats64,
2971+ .ndo_fill_forward_path = ppp_fill_forward_path,
2972 };
2973
2974 static struct device_type ppp_type = {
2975diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
developer7eb15dc2023-06-14 17:44:03 +08002976index 087b016..7a8c246 100644
developer8cb3ac72022-07-04 10:55:14 +08002977--- a/drivers/net/ppp/pppoe.c
2978+++ b/drivers/net/ppp/pppoe.c
2979@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
2980 return __pppoe_xmit(sk, skb);
2981 }
2982
2983+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2984+ struct net_device_path *path,
2985+ const struct ppp_channel *chan)
2986+{
2987+ struct sock *sk = (struct sock *)chan->private;
2988+ struct pppox_sock *po = pppox_sk(sk);
2989+ struct net_device *dev = po->pppoe_dev;
2990+
2991+ if (sock_flag(sk, SOCK_DEAD) ||
2992+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2993+ return -1;
2994+
2995+ path->type = DEV_PATH_PPPOE;
2996+ path->encap.proto = htons(ETH_P_PPP_SES);
2997+ path->encap.id = be16_to_cpu(po->num);
2998+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2999+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
3000+ path->dev = ctx->dev;
3001+ ctx->dev = dev;
3002+
3003+ return 0;
3004+}
3005+
3006 static const struct ppp_channel_ops pppoe_chan_ops = {
3007 .start_xmit = pppoe_xmit,
3008+ .fill_forward_path = pppoe_fill_forward_path,
3009 };
3010
3011 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
3012diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
developer7eb15dc2023-06-14 17:44:03 +08003013index c2953e8..a921fa5 100644
developer8cb3ac72022-07-04 10:55:14 +08003014--- a/include/linux/netdevice.h
3015+++ b/include/linux/netdevice.h
developer7eb15dc2023-06-14 17:44:03 +08003016@@ -838,6 +838,66 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08003017 struct sk_buff *skb,
3018 struct net_device *sb_dev);
3019
3020+enum net_device_path_type {
3021+ DEV_PATH_ETHERNET = 0,
3022+ DEV_PATH_VLAN,
3023+ DEV_PATH_BRIDGE,
3024+ DEV_PATH_PPPOE,
3025+ DEV_PATH_DSA,
developer7eb15dc2023-06-14 17:44:03 +08003026+ DEV_PATH_MTK_WDMA,
developer8cb3ac72022-07-04 10:55:14 +08003027+};
3028+
3029+struct net_device_path {
3030+ enum net_device_path_type type;
3031+ const struct net_device *dev;
3032+ union {
3033+ struct {
3034+ u16 id;
3035+ __be16 proto;
3036+ u8 h_dest[ETH_ALEN];
3037+ } encap;
3038+ struct {
3039+ enum {
3040+ DEV_PATH_BR_VLAN_KEEP,
3041+ DEV_PATH_BR_VLAN_TAG,
3042+ DEV_PATH_BR_VLAN_UNTAG,
3043+ DEV_PATH_BR_VLAN_UNTAG_HW,
3044+ } vlan_mode;
3045+ u16 vlan_id;
3046+ __be16 vlan_proto;
3047+ } bridge;
3048+ struct {
3049+ int port;
3050+ u16 proto;
3051+ } dsa;
developer7eb15dc2023-06-14 17:44:03 +08003052+ struct {
3053+ u8 wdma_idx;
3054+ u8 queue;
3055+ u16 wcid;
3056+ u8 bss;
3057+ } mtk_wdma;
developer8cb3ac72022-07-04 10:55:14 +08003058+ };
3059+};
3060+
3061+#define NET_DEVICE_PATH_STACK_MAX 5
3062+#define NET_DEVICE_PATH_VLAN_MAX 2
3063+
3064+struct net_device_path_stack {
3065+ int num_paths;
3066+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
3067+};
3068+
3069+struct net_device_path_ctx {
3070+ const struct net_device *dev;
3071+ u8 daddr[ETH_ALEN];
3072+
3073+ int num_vlans;
3074+ struct {
3075+ u16 id;
3076+ __be16 proto;
3077+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
3078+};
3079+
3080 enum tc_setup_type {
3081 TC_SETUP_QDISC_MQPRIO,
3082 TC_SETUP_CLSU32,
developer7eb15dc2023-06-14 17:44:03 +08003083@@ -853,6 +913,7 @@ enum tc_setup_type {
developer8cb3ac72022-07-04 10:55:14 +08003084 TC_SETUP_ROOT_QDISC,
3085 TC_SETUP_QDISC_GRED,
3086 TC_SETUP_QDISC_TAPRIO,
3087+ TC_SETUP_FT,
3088 };
3089
3090 /* These structures hold the attributes of bpf state that are being passed
developer7eb15dc2023-06-14 17:44:03 +08003091@@ -1248,6 +1309,10 @@ struct tlsdev_ops;
developer8cb3ac72022-07-04 10:55:14 +08003092 * Get devlink port instance associated with a given netdev.
3093 * Called with a reference on the netdevice and devlink locks only,
3094 * rtnl_lock is not held.
3095+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
3096+ * Get the forwarding path to reach the real device from the HW destination address
developer7eb15dc2023-06-14 17:44:03 +08003097+ * int (*ndo_fill_receive_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
3098+ * Get the receiving path to reach the real device from the HW source address
developer8cb3ac72022-07-04 10:55:14 +08003099 */
3100 struct net_device_ops {
3101 int (*ndo_init)(struct net_device *dev);
developer7eb15dc2023-06-14 17:44:03 +08003102@@ -1445,6 +1510,10 @@ struct net_device_ops {
developer8cb3ac72022-07-04 10:55:14 +08003103 int (*ndo_xsk_wakeup)(struct net_device *dev,
3104 u32 queue_id, u32 flags);
3105 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
3106+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
3107+ struct net_device_path *path);
developer7eb15dc2023-06-14 17:44:03 +08003108+ int (*ndo_fill_receive_path)(struct net_device_path_ctx *ctx,
3109+ struct net_device_path *path);
developer8cb3ac72022-07-04 10:55:14 +08003110 };
3111
3112 /**
developer7eb15dc2023-06-14 17:44:03 +08003113@@ -2670,6 +2739,8 @@ void dev_remove_offload(struct packet_offload *po);
developer8cb3ac72022-07-04 10:55:14 +08003114
3115 int dev_get_iflink(const struct net_device *dev);
3116 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
3117+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
3118+ struct net_device_path_stack *stack);
3119 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
3120 unsigned short mask);
3121 struct net_device *dev_get_by_name(struct net *net, const char *name);
3122diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
developer7eb15dc2023-06-14 17:44:03 +08003123index 9896606..91f9a92 100644
developer8cb3ac72022-07-04 10:55:14 +08003124--- a/include/linux/ppp_channel.h
3125+++ b/include/linux/ppp_channel.h
3126@@ -28,6 +28,9 @@ struct ppp_channel_ops {
3127 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
3128 /* Handle an ioctl call that has come in via /dev/ppp. */
3129 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
3130+ int (*fill_forward_path)(struct net_device_path_ctx *,
3131+ struct net_device_path *,
3132+ const struct ppp_channel *);
3133 };
3134
3135 struct ppp_channel {
3136diff --git a/include/net/dsa.h b/include/net/dsa.h
developer7eb15dc2023-06-14 17:44:03 +08003137index d29ee9e..43f65cb 100644
developer8cb3ac72022-07-04 10:55:14 +08003138--- a/include/net/dsa.h
3139+++ b/include/net/dsa.h
developer7eb15dc2023-06-14 17:44:03 +08003140@@ -562,6 +562,8 @@ struct dsa_switch_ops {
developer8cb3ac72022-07-04 10:55:14 +08003141 struct sk_buff *skb);
3142 };
3143
3144+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
3145+
3146 struct dsa_switch_driver {
3147 struct list_head list;
3148 const struct dsa_switch_ops *ops;
developer7eb15dc2023-06-14 17:44:03 +08003149@@ -654,6 +656,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08003150 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
3151 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
3152
3153+#if IS_ENABLED(CONFIG_NET_DSA)
3154+bool dsa_slave_dev_check(const struct net_device *dev);
3155+#else
3156+static inline bool dsa_slave_dev_check(const struct net_device *dev)
3157+{
3158+ return false;
3159+}
3160+#endif
3161
3162 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
3163 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
3164diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
developer7eb15dc2023-06-14 17:44:03 +08003165index c6f7bd2..59b8736 100644
developer8cb3ac72022-07-04 10:55:14 +08003166--- a/include/net/flow_offload.h
3167+++ b/include/net/flow_offload.h
3168@@ -138,6 +138,7 @@ enum flow_action_id {
3169 FLOW_ACTION_MPLS_PUSH,
3170 FLOW_ACTION_MPLS_POP,
3171 FLOW_ACTION_MPLS_MANGLE,
3172+ FLOW_ACTION_PPPOE_PUSH,
3173 NUM_FLOW_ACTIONS,
3174 };
3175
3176@@ -213,6 +214,9 @@ struct flow_action_entry {
3177 u8 bos;
3178 u8 ttl;
3179 } mpls_mangle;
3180+ struct { /* FLOW_ACTION_PPPOE_PUSH */
3181+ u16 sid;
3182+ } pppoe;
3183 };
3184 };
3185
3186diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
developer7eb15dc2023-06-14 17:44:03 +08003187index 2c739fc..89ab8f1 100644
developer8cb3ac72022-07-04 10:55:14 +08003188--- a/include/net/ip6_route.h
3189+++ b/include/net/ip6_route.h
3190@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
3191 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
3192 }
3193
3194-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
3195+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
3196+ bool forwarding)
3197 {
3198 struct inet6_dev *idev;
3199 unsigned int mtu;
3200
3201- if (dst_metric_locked(dst, RTAX_MTU)) {
3202+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
3203 mtu = dst_metric_raw(dst, RTAX_MTU);
3204 if (mtu)
3205 goto out;
3206diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
developer7eb15dc2023-06-14 17:44:03 +08003207index 7b3c873..e954831 100644
developer8cb3ac72022-07-04 10:55:14 +08003208--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
3209+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
3210@@ -4,7 +4,4 @@
3211
3212 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
3213
3214-#include <linux/sysctl.h>
3215-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
3216-
3217 #endif /* _NF_CONNTRACK_IPV6_H*/
3218diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
developer7eb15dc2023-06-14 17:44:03 +08003219index 90690e3..ce0bc3e 100644
developer8cb3ac72022-07-04 10:55:14 +08003220--- a/include/net/netfilter/nf_conntrack.h
3221+++ b/include/net/netfilter/nf_conntrack.h
3222@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
3223 !nf_ct_is_dying(ct);
3224 }
3225
3226+#define NF_CT_DAY (86400 * HZ)
3227+
3228+/* Set an arbitrary timeout large enough not to ever expire, this save
3229+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
3230+ * nf_ct_is_expired().
3231+ */
3232+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
3233+{
3234+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
3235+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
3236+}
3237+
3238 struct kernel_param;
3239
3240 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
3241diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
developer7eb15dc2023-06-14 17:44:03 +08003242index f7a060c..7f44a77 100644
developer8cb3ac72022-07-04 10:55:14 +08003243--- a/include/net/netfilter/nf_conntrack_acct.h
3244+++ b/include/net/netfilter/nf_conntrack_acct.h
3245@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
3246 #endif
3247 }
3248
3249+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3250+ unsigned int bytes);
3251+
3252+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
3253+ unsigned int bytes)
3254+{
3255+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
3256+ nf_ct_acct_add(ct, dir, 1, bytes);
3257+#endif
3258+}
3259+
3260 void nf_conntrack_acct_pernet_init(struct net *net);
3261
3262 int nf_conntrack_acct_init(void);
3263diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developer7eb15dc2023-06-14 17:44:03 +08003264index 68d7fc9..7cf8976 100644
developer8cb3ac72022-07-04 10:55:14 +08003265--- a/include/net/netfilter/nf_flow_table.h
3266+++ b/include/net/netfilter/nf_flow_table.h
3267@@ -8,31 +8,99 @@
3268 #include <linux/rcupdate.h>
3269 #include <linux/netfilter.h>
3270 #include <linux/netfilter/nf_conntrack_tuple_common.h>
3271+#include <net/flow_offload.h>
3272 #include <net/dst.h>
3273+#include <linux/if_pppox.h>
3274+#include <linux/ppp_defs.h>
3275
3276 struct nf_flowtable;
3277+struct nf_flow_rule;
3278+struct flow_offload;
3279+enum flow_offload_tuple_dir;
3280+
3281+struct nf_flow_key {
3282+ struct flow_dissector_key_meta meta;
3283+ struct flow_dissector_key_control control;
3284+ struct flow_dissector_key_control enc_control;
3285+ struct flow_dissector_key_basic basic;
3286+ struct flow_dissector_key_vlan vlan;
3287+ struct flow_dissector_key_vlan cvlan;
3288+ union {
3289+ struct flow_dissector_key_ipv4_addrs ipv4;
3290+ struct flow_dissector_key_ipv6_addrs ipv6;
3291+ };
3292+ struct flow_dissector_key_keyid enc_key_id;
3293+ union {
3294+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
3295+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
3296+ };
3297+ struct flow_dissector_key_tcp tcp;
3298+ struct flow_dissector_key_ports tp;
3299+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
3300+
3301+struct nf_flow_match {
3302+ struct flow_dissector dissector;
3303+ struct nf_flow_key key;
3304+ struct nf_flow_key mask;
3305+};
3306+
3307+struct nf_flow_rule {
3308+ struct nf_flow_match match;
3309+ struct flow_rule *rule;
3310+};
3311
3312 struct nf_flowtable_type {
3313 struct list_head list;
3314 int family;
3315 int (*init)(struct nf_flowtable *ft);
3316+ int (*setup)(struct nf_flowtable *ft,
3317+ struct net_device *dev,
3318+ enum flow_block_command cmd);
3319+ int (*action)(struct net *net,
3320+ const struct flow_offload *flow,
3321+ enum flow_offload_tuple_dir dir,
3322+ struct nf_flow_rule *flow_rule);
3323 void (*free)(struct nf_flowtable *ft);
3324 nf_hookfn *hook;
3325 struct module *owner;
3326 };
3327
3328+enum nf_flowtable_flags {
3329+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
3330+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
3331+};
3332+
3333 struct nf_flowtable {
3334 struct list_head list;
3335 struct rhashtable rhashtable;
3336+ int priority;
3337 const struct nf_flowtable_type *type;
3338 struct delayed_work gc_work;
3339+ unsigned int flags;
3340+ struct flow_block flow_block;
3341+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
3342+ possible_net_t net;
3343 };
3344
3345+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
3346+{
3347+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
3348+}
3349+
3350 enum flow_offload_tuple_dir {
3351 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
3352 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
3353- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
3354 };
3355+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
3356+
3357+enum flow_offload_xmit_type {
3358+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
3359+ FLOW_OFFLOAD_XMIT_NEIGH,
3360+ FLOW_OFFLOAD_XMIT_XFRM,
3361+ FLOW_OFFLOAD_XMIT_DIRECT,
3362+};
3363+
3364+#define NF_FLOW_TABLE_ENCAP_MAX 2
3365
3366 struct flow_offload_tuple {
3367 union {
developerb7c46752022-07-04 19:51:38 +08003368@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08003369
3370 u8 l3proto;
3371 u8 l4proto;
3372- u8 dir;
3373+ struct {
3374+ u16 id;
3375+ __be16 proto;
3376+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
3377
3378- u16 mtu;
3379+ /* All members above are keys for lookups, see flow_offload_hash(). */
3380+ struct { } __hash;
3381
developerb7c46752022-07-04 19:51:38 +08003382- struct {
3383- struct dst_entry *dst_cache;
3384- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08003385+ u8 dir:2,
3386+ xmit_type:2,
3387+ encap_num:2,
3388+ in_vlan_ingress:2;
3389+ u16 mtu;
3390+ union {
3391+ struct {
3392+ struct dst_entry *dst_cache;
3393+ u32 dst_cookie;
3394+ };
3395+ struct {
3396+ u32 ifidx;
3397+ u32 hw_ifidx;
3398+ u8 h_source[ETH_ALEN];
3399+ u8 h_dest[ETH_ALEN];
3400+ } out;
developerb7c46752022-07-04 19:51:38 +08003401 };
developer8cb3ac72022-07-04 10:55:14 +08003402 };
3403
developer7eb15dc2023-06-14 17:44:03 +08003404@@ -67,52 +152,139 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08003405 struct flow_offload_tuple tuple;
3406 };
3407
3408-#define FLOW_OFFLOAD_SNAT 0x1
3409-#define FLOW_OFFLOAD_DNAT 0x2
3410-#define FLOW_OFFLOAD_DYING 0x4
3411-#define FLOW_OFFLOAD_TEARDOWN 0x8
3412+enum nf_flow_flags {
3413+ NF_FLOW_SNAT,
3414+ NF_FLOW_DNAT,
3415+ NF_FLOW_TEARDOWN,
3416+ NF_FLOW_HW,
3417+ NF_FLOW_HW_DYING,
3418+ NF_FLOW_HW_DEAD,
3419+ NF_FLOW_HW_PENDING,
3420+};
3421+
3422+enum flow_offload_type {
3423+ NF_FLOW_OFFLOAD_UNSPEC = 0,
3424+ NF_FLOW_OFFLOAD_ROUTE,
3425+};
3426
3427 struct flow_offload {
3428 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
3429- u32 flags;
3430- union {
3431- /* Your private driver data here. */
3432- u32 timeout;
3433- };
3434+ struct nf_conn *ct;
3435+ unsigned long flags;
3436+ u16 type;
3437+ u32 timeout;
3438+ struct rcu_head rcu_head;
3439 };
3440
3441 #define NF_FLOW_TIMEOUT (30 * HZ)
3442+#define nf_flowtable_time_stamp (u32)jiffies
3443+
3444+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
3445+
3446+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
3447+{
3448+ return (__s32)(timeout - nf_flowtable_time_stamp);
3449+}
3450
3451 struct nf_flow_route {
3452 struct {
3453- struct dst_entry *dst;
3454+ struct dst_entry *dst;
3455+ struct {
3456+ u32 ifindex;
3457+ struct {
3458+ u16 id;
3459+ __be16 proto;
3460+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
3461+ u8 num_encaps:2,
3462+ ingress_vlans:2;
3463+ } in;
3464+ struct {
3465+ u32 ifindex;
3466+ u32 hw_ifindex;
3467+ u8 h_source[ETH_ALEN];
3468+ u8 h_dest[ETH_ALEN];
3469+ } out;
3470+ enum flow_offload_xmit_type xmit_type;
3471 } tuple[FLOW_OFFLOAD_DIR_MAX];
3472 };
3473
3474-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
3475- struct nf_flow_route *route);
3476+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
3477 void flow_offload_free(struct flow_offload *flow);
3478
3479+static inline int
3480+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
3481+ flow_setup_cb_t *cb, void *cb_priv)
3482+{
3483+ struct flow_block *block = &flow_table->flow_block;
3484+ struct flow_block_cb *block_cb;
3485+ int err = 0;
3486+
3487+ down_write(&flow_table->flow_block_lock);
3488+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
3489+ if (block_cb) {
3490+ err = -EEXIST;
3491+ goto unlock;
3492+ }
3493+
3494+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
3495+ if (IS_ERR(block_cb)) {
3496+ err = PTR_ERR(block_cb);
3497+ goto unlock;
3498+ }
3499+
3500+ list_add_tail(&block_cb->list, &block->cb_list);
3501+
3502+unlock:
3503+ up_write(&flow_table->flow_block_lock);
3504+ return err;
3505+}
3506+
3507+static inline void
3508+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
3509+ flow_setup_cb_t *cb, void *cb_priv)
3510+{
3511+ struct flow_block *block = &flow_table->flow_block;
3512+ struct flow_block_cb *block_cb;
3513+
3514+ down_write(&flow_table->flow_block_lock);
3515+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
3516+ if (block_cb) {
3517+ list_del(&block_cb->list);
3518+ flow_block_cb_free(block_cb);
3519+ } else {
3520+ WARN_ON(true);
3521+ }
3522+ up_write(&flow_table->flow_block_lock);
3523+}
3524+
3525+int flow_offload_route_init(struct flow_offload *flow,
3526+ const struct nf_flow_route *route);
3527+
3528 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
3529+void flow_offload_refresh(struct nf_flowtable *flow_table,
3530+ struct flow_offload *flow);
3531+
3532 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
3533 struct flow_offload_tuple *tuple);
3534+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
3535+ struct net_device *dev);
3536 void nf_flow_table_cleanup(struct net_device *dev);
3537
3538 int nf_flow_table_init(struct nf_flowtable *flow_table);
3539 void nf_flow_table_free(struct nf_flowtable *flow_table);
3540
3541 void flow_offload_teardown(struct flow_offload *flow);
3542-static inline void flow_offload_dead(struct flow_offload *flow)
3543-{
3544- flow->flags |= FLOW_OFFLOAD_DYING;
3545-}
3546
3547-int nf_flow_snat_port(const struct flow_offload *flow,
3548- struct sk_buff *skb, unsigned int thoff,
3549- u8 protocol, enum flow_offload_tuple_dir dir);
3550-int nf_flow_dnat_port(const struct flow_offload *flow,
3551- struct sk_buff *skb, unsigned int thoff,
3552- u8 protocol, enum flow_offload_tuple_dir dir);
3553+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3554+ void (*iter)(struct flow_offload *flow, void *data),
3555+ void *data);
3556+
3557+void nf_flow_snat_port(const struct flow_offload *flow,
3558+ struct sk_buff *skb, unsigned int thoff,
3559+ u8 protocol, enum flow_offload_tuple_dir dir);
3560+void nf_flow_dnat_port(const struct flow_offload *flow,
3561+ struct sk_buff *skb, unsigned int thoff,
3562+ u8 protocol, enum flow_offload_tuple_dir dir);
3563
3564 struct flow_ports {
3565 __be16 source, dest;
developerb7c46752022-07-04 19:51:38 +08003566@@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08003567 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
3568 MODULE_ALIAS("nf-flowtable-" __stringify(family))
3569
3570+void nf_flow_offload_add(struct nf_flowtable *flowtable,
3571+ struct flow_offload *flow);
3572+void nf_flow_offload_del(struct nf_flowtable *flowtable,
3573+ struct flow_offload *flow);
3574+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developer7eb15dc2023-06-14 17:44:03 +08003575+ struct flow_offload *flow);
developer8cb3ac72022-07-04 10:55:14 +08003576+
3577+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
3578+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
3579+ struct net_device *dev,
3580+ enum flow_block_command cmd);
3581+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
3582+ enum flow_offload_tuple_dir dir,
3583+ struct nf_flow_rule *flow_rule);
3584+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
3585+ enum flow_offload_tuple_dir dir,
3586+ struct nf_flow_rule *flow_rule);
3587+
3588+int nf_flow_table_offload_init(void);
3589+void nf_flow_table_offload_exit(void);
3590+
3591+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
3592+{
3593+ __be16 proto;
3594+
3595+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
3596+ sizeof(struct pppoe_hdr)));
3597+ switch (proto) {
3598+ case htons(PPP_IP):
3599+ return htons(ETH_P_IP);
3600+ case htons(PPP_IPV6):
3601+ return htons(ETH_P_IPV6);
3602+ }
3603+
3604+ return 0;
3605+}
3606+
3607 #endif /* _NF_FLOW_TABLE_H */
3608diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
developer7eb15dc2023-06-14 17:44:03 +08003609index 806454e..9e3963c 100644
developer8cb3ac72022-07-04 10:55:14 +08003610--- a/include/net/netns/conntrack.h
3611+++ b/include/net/netns/conntrack.h
3612@@ -27,6 +27,9 @@ struct nf_tcp_net {
3613 int tcp_loose;
3614 int tcp_be_liberal;
3615 int tcp_max_retrans;
3616+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3617+ unsigned int offload_timeout;
3618+#endif
3619 };
3620
3621 enum udp_conntrack {
3622@@ -37,6 +40,9 @@ enum udp_conntrack {
3623
3624 struct nf_udp_net {
3625 unsigned int timeouts[UDP_CT_MAX];
3626+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3627+ unsigned int offload_timeout;
3628+#endif
3629 };
3630
3631 struct nf_icmp_net {
3632diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
developer7eb15dc2023-06-14 17:44:03 +08003633index 336014b..ae698d1 100644
developer8cb3ac72022-07-04 10:55:14 +08003634--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
3635+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
3636@@ -105,14 +105,19 @@ enum ip_conntrack_status {
3637 IPS_OFFLOAD_BIT = 14,
3638 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
3639
3640+ /* Conntrack has been offloaded to hardware. */
3641+ IPS_HW_OFFLOAD_BIT = 15,
3642+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
3643+
3644 /* Be careful here, modifying these bits can make things messy,
3645 * so don't let users modify them directly.
3646 */
3647 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
3648 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
3649- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
3650+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
3651+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
3652
3653- __IPS_MAX_BIT = 15,
3654+ __IPS_MAX_BIT = 16,
3655 };
3656
3657 /* Connection tracking event types */
3658diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
3659new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +08003660index 0000000..5841bbe
developer8cb3ac72022-07-04 10:55:14 +08003661--- /dev/null
3662+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
3663@@ -0,0 +1,17 @@
3664+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
3665+#ifndef _XT_FLOWOFFLOAD_H
3666+#define _XT_FLOWOFFLOAD_H
3667+
3668+#include <linux/types.h>
3669+
3670+enum {
3671+ XT_FLOWOFFLOAD_HW = 1 << 0,
3672+
3673+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
3674+};
3675+
3676+struct xt_flowoffload_target_info {
3677+ __u32 flags;
3678+};
3679+
3680+#endif /* _XT_FLOWOFFLOAD_H */
3681diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
developer7eb15dc2023-06-14 17:44:03 +08003682index 589615e..444ab5f 100644
developer8cb3ac72022-07-04 10:55:14 +08003683--- a/net/8021q/vlan_dev.c
3684+++ b/net/8021q/vlan_dev.c
3685@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
3686 return real_dev->ifindex;
3687 }
3688
3689+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
3690+ struct net_device_path *path)
3691+{
3692+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
3693+
3694+ path->type = DEV_PATH_VLAN;
3695+ path->encap.id = vlan->vlan_id;
3696+ path->encap.proto = vlan->vlan_proto;
3697+ path->dev = ctx->dev;
3698+ ctx->dev = vlan->real_dev;
3699+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
3700+ return -ENOSPC;
3701+
3702+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
3703+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
3704+ ctx->num_vlans++;
3705+
3706+ return 0;
3707+}
3708+
3709 static const struct ethtool_ops vlan_ethtool_ops = {
3710 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
3711 .get_drvinfo = vlan_ethtool_get_drvinfo,
3712@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
3713 #endif
3714 .ndo_fix_features = vlan_dev_fix_features,
3715 .ndo_get_iflink = vlan_dev_get_iflink,
3716+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
3717 };
3718
3719 static void vlan_dev_free(struct net_device *dev)
3720diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
developer7eb15dc2023-06-14 17:44:03 +08003721index 501f77f..0940b44 100644
developer8cb3ac72022-07-04 10:55:14 +08003722--- a/net/bridge/br_device.c
3723+++ b/net/bridge/br_device.c
3724@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
3725 return br_del_if(br, slave_dev);
3726 }
3727
3728+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
3729+ struct net_device_path *path)
3730+{
3731+ struct net_bridge_fdb_entry *f;
3732+ struct net_bridge_port *dst;
3733+ struct net_bridge *br;
3734+
3735+ if (netif_is_bridge_port(ctx->dev))
3736+ return -1;
3737+
3738+ br = netdev_priv(ctx->dev);
3739+
3740+ br_vlan_fill_forward_path_pvid(br, ctx, path);
3741+
3742+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
3743+ if (!f || !f->dst)
3744+ return -1;
3745+
3746+ dst = READ_ONCE(f->dst);
3747+ if (!dst)
3748+ return -1;
3749+
3750+ if (br_vlan_fill_forward_path_mode(br, dst, path))
3751+ return -1;
3752+
3753+ path->type = DEV_PATH_BRIDGE;
3754+ path->dev = dst->br->dev;
3755+ ctx->dev = dst->dev;
3756+
3757+ switch (path->bridge.vlan_mode) {
3758+ case DEV_PATH_BR_VLAN_TAG:
3759+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
3760+ return -ENOSPC;
3761+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
3762+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
3763+ ctx->num_vlans++;
3764+ break;
3765+ case DEV_PATH_BR_VLAN_UNTAG_HW:
3766+ case DEV_PATH_BR_VLAN_UNTAG:
3767+ ctx->num_vlans--;
3768+ break;
3769+ case DEV_PATH_BR_VLAN_KEEP:
3770+ break;
3771+ }
3772+
3773+ return 0;
3774+}
3775+
3776 static const struct ethtool_ops br_ethtool_ops = {
3777 .get_drvinfo = br_getinfo,
3778 .get_link = ethtool_op_get_link,
3779@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
3780 .ndo_bridge_setlink = br_setlink,
3781 .ndo_bridge_dellink = br_dellink,
3782 .ndo_features_check = passthru_features_check,
3783+ .ndo_fill_forward_path = br_fill_forward_path,
3784 };
3785
3786 static struct device_type br_type = {
3787diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
developer7eb15dc2023-06-14 17:44:03 +08003788index a736be8..4bd9e9b 100644
developer8cb3ac72022-07-04 10:55:14 +08003789--- a/net/bridge/br_private.h
3790+++ b/net/bridge/br_private.h
3791@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
3792 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
3793 void *ptr);
3794
3795+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
3796+ struct net_device_path_ctx *ctx,
3797+ struct net_device_path *path);
3798+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
3799+ struct net_bridge_port *dst,
3800+ struct net_device_path *path);
3801+
3802 static inline struct net_bridge_vlan_group *br_vlan_group(
3803 const struct net_bridge *br)
3804 {
3805@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
3806 return 0;
3807 }
3808
3809+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
3810+ struct net_device_path_ctx *ctx,
3811+ struct net_device_path *path)
3812+{
3813+}
3814+
3815+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
3816+ struct net_bridge_port *dst,
3817+ struct net_device_path *path)
3818+{
3819+ return 0;
3820+}
3821+
3822 static inline struct net_bridge_vlan_group *br_vlan_group(
3823 const struct net_bridge *br)
3824 {
3825diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
developer7eb15dc2023-06-14 17:44:03 +08003826index 9257292..bcfd169 100644
developer8cb3ac72022-07-04 10:55:14 +08003827--- a/net/bridge/br_vlan.c
3828+++ b/net/bridge/br_vlan.c
3829@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
3830 }
3831 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
3832
3833+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
3834+ struct net_device_path_ctx *ctx,
3835+ struct net_device_path *path)
3836+{
3837+ struct net_bridge_vlan_group *vg;
3838+ int idx = ctx->num_vlans - 1;
3839+ u16 vid;
3840+
3841+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
3842+
3843+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
3844+ return;
3845+
3846+ vg = br_vlan_group(br);
3847+
3848+ if (idx >= 0 &&
3849+ ctx->vlan[idx].proto == br->vlan_proto) {
3850+ vid = ctx->vlan[idx].id;
3851+ } else {
3852+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
3853+ vid = br_get_pvid(vg);
3854+ }
3855+
3856+ path->bridge.vlan_id = vid;
3857+ path->bridge.vlan_proto = br->vlan_proto;
3858+}
3859+
3860+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
3861+ struct net_bridge_port *dst,
3862+ struct net_device_path *path)
3863+{
3864+ struct net_bridge_vlan_group *vg;
3865+ struct net_bridge_vlan *v;
3866+
3867+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
3868+ return 0;
3869+
3870+ vg = nbp_vlan_group_rcu(dst);
3871+ v = br_vlan_find(vg, path->bridge.vlan_id);
3872+ if (!v || !br_vlan_should_use(v))
3873+ return -EINVAL;
3874+
3875+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
3876+ return 0;
3877+
3878+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
3879+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
3880+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
3881+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
3882+ else
3883+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
3884+
3885+ return 0;
3886+}
3887+
3888 int br_vlan_get_info(const struct net_device *dev, u16 vid,
3889 struct bridge_vlan_info *p_vinfo)
3890 {
3891diff --git a/net/core/dev.c b/net/core/dev.c
developer7eb15dc2023-06-14 17:44:03 +08003892index 794c768..a1f046c 100644
developer8cb3ac72022-07-04 10:55:14 +08003893--- a/net/core/dev.c
3894+++ b/net/core/dev.c
3895@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
3896 }
3897 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
3898
3899+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
3900+{
3901+ int k = stack->num_paths++;
3902+
3903+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
3904+ return NULL;
3905+
3906+ return &stack->path[k];
3907+}
3908+
3909+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
3910+ struct net_device_path_stack *stack)
3911+{
3912+ const struct net_device *last_dev;
3913+ struct net_device_path_ctx ctx = {
3914+ .dev = dev,
3915+ };
3916+ struct net_device_path *path;
3917+ int ret = 0;
3918+
3919+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
3920+ stack->num_paths = 0;
3921+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
3922+ last_dev = ctx.dev;
3923+ path = dev_fwd_path(stack);
3924+ if (!path)
3925+ return -1;
3926+
3927+ memset(path, 0, sizeof(struct net_device_path));
3928+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
3929+ if (ret < 0)
3930+ return -1;
3931+
3932+ if (WARN_ON_ONCE(last_dev == ctx.dev))
3933+ return -1;
3934+ }
3935+ path = dev_fwd_path(stack);
3936+ if (!path)
3937+ return -1;
3938+ path->type = DEV_PATH_ETHERNET;
3939+ path->dev = ctx.dev;
3940+
3941+ return ret;
3942+}
3943+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
3944+
3945 /**
3946 * __dev_get_by_name - find a device by its name
3947 * @net: the applicable net namespace
3948diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
developer7eb15dc2023-06-14 17:44:03 +08003949index ca80f86..35a1249 100644
developer8cb3ac72022-07-04 10:55:14 +08003950--- a/net/dsa/dsa.c
3951+++ b/net/dsa/dsa.c
3952@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
3953 }
3954 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
3955
3956+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
3957+{
3958+ if (!netdev || !dsa_slave_dev_check(netdev))
3959+ return ERR_PTR(-ENODEV);
3960+
3961+ return dsa_slave_to_port(netdev);
3962+}
3963+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
3964+
3965 static int __init dsa_init_module(void)
3966 {
3967 int rc;
3968diff --git a/net/dsa/slave.c b/net/dsa/slave.c
developer7eb15dc2023-06-14 17:44:03 +08003969index e2b91b3..2dfaa1e 100644
developer8cb3ac72022-07-04 10:55:14 +08003970--- a/net/dsa/slave.c
3971+++ b/net/dsa/slave.c
developer7eb15dc2023-06-14 17:44:03 +08003972@@ -1031,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08003973 }
3974 }
3975
3976+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
3977+ void *type_data)
3978+{
3979+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
3980+ struct net_device *master = cpu_dp->master;
3981+
3982+ if (!master->netdev_ops->ndo_setup_tc)
3983+ return -EOPNOTSUPP;
3984+
3985+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
3986+}
3987+
3988 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
3989 void *type_data)
3990 {
3991 struct dsa_port *dp = dsa_slave_to_port(dev);
3992 struct dsa_switch *ds = dp->ds;
3993
3994- if (type == TC_SETUP_BLOCK)
3995+ switch (type) {
3996+ case TC_SETUP_BLOCK:
3997 return dsa_slave_setup_tc_block(dev, type_data);
3998+ case TC_SETUP_FT:
3999+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
4000+ default:
4001+ break;
4002+ }
4003
4004 if (!ds->ops->port_setup_tc)
4005 return -EOPNOTSUPP;
developer7eb15dc2023-06-14 17:44:03 +08004006@@ -1224,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004007 return dp->ds->devlink ? &dp->devlink_port : NULL;
4008 }
4009
4010+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
4011+ struct net_device_path *path)
4012+{
4013+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
4014+ struct dsa_port *cpu_dp = dp->cpu_dp;
4015+
4016+ path->dev = ctx->dev;
4017+ path->type = DEV_PATH_DSA;
4018+ path->dsa.proto = cpu_dp->tag_ops->proto;
4019+ path->dsa.port = dp->index;
4020+ ctx->dev = cpu_dp->master;
4021+
4022+ return 0;
4023+}
4024+
4025 static const struct net_device_ops dsa_slave_netdev_ops = {
4026 .ndo_open = dsa_slave_open,
4027 .ndo_stop = dsa_slave_close,
developer7eb15dc2023-06-14 17:44:03 +08004028@@ -1248,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +08004029 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
4030 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
4031 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
4032+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
4033 };
4034
4035 static struct device_type dsa_type = {
developer7eb15dc2023-06-14 17:44:03 +08004036@@ -1499,6 +1533,7 @@ bool dsa_slave_dev_check(const struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004037 {
4038 return dev->netdev_ops == &dsa_slave_netdev_ops;
4039 }
4040+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
4041
4042 static int dsa_slave_changeupper(struct net_device *dev,
4043 struct netdev_notifier_changeupper_info *info)
4044diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
developer7eb15dc2023-06-14 17:44:03 +08004045index f17b402..803b92e 100644
developer8cb3ac72022-07-04 10:55:14 +08004046--- a/net/ipv4/netfilter/Kconfig
4047+++ b/net/ipv4/netfilter/Kconfig
4048@@ -56,8 +56,6 @@ config NF_TABLES_ARP
4049 help
4050 This option enables the ARP support for nf_tables.
4051
4052-endif # NF_TABLES
4053-
4054 config NF_FLOW_TABLE_IPV4
4055 tristate "Netfilter flow table IPv4 module"
4056 depends on NF_FLOW_TABLE
4057@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
4058
4059 To compile it as a module, choose M here.
4060
4061+endif # NF_TABLES
4062+
4063 config NF_DUP_IPV4
4064 tristate "Netfilter IPv4 packet duplication to alternate destination"
4065 depends on !NF_CONNTRACK || NF_CONNTRACK
4066diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
developer7eb15dc2023-06-14 17:44:03 +08004067index 457eb07..330f8e3 100644
developer8cb3ac72022-07-04 10:55:14 +08004068--- a/net/ipv6/ip6_output.c
4069+++ b/net/ipv6/ip6_output.c
4070@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
4071 }
4072 }
4073
4074- mtu = ip6_dst_mtu_forward(dst);
4075+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
4076 if (mtu < IPV6_MIN_MTU)
4077 mtu = IPV6_MIN_MTU;
4078
4079diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
developer7eb15dc2023-06-14 17:44:03 +08004080index 69443e9..0b481d2 100644
developer8cb3ac72022-07-04 10:55:14 +08004081--- a/net/ipv6/netfilter/Kconfig
4082+++ b/net/ipv6/netfilter/Kconfig
4083@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
4084 multicast or blackhole.
4085
4086 endif # NF_TABLES_IPV6
4087-endif # NF_TABLES
4088
4089 config NF_FLOW_TABLE_IPV6
4090 tristate "Netfilter flow table IPv6 module"
4091@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
4092
4093 To compile it as a module, choose M here.
4094
4095+endif # NF_TABLES
4096+
4097 config NF_DUP_IPV6
4098 tristate "Netfilter IPv6 packet duplication to alternate destination"
4099 depends on !NF_CONNTRACK || NF_CONNTRACK
4100diff --git a/net/ipv6/route.c b/net/ipv6/route.c
developer7eb15dc2023-06-14 17:44:03 +08004101index 43d185c..82a752c 100644
developer8cb3ac72022-07-04 10:55:14 +08004102--- a/net/ipv6/route.c
4103+++ b/net/ipv6/route.c
4104@@ -83,7 +83,7 @@ enum rt6_nud_state {
4105
4106 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
4107 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
4108-static unsigned int ip6_mtu(const struct dst_entry *dst);
4109+static unsigned int ip6_mtu(const struct dst_entry *dst);
4110 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
4111 static void ip6_dst_destroy(struct dst_entry *);
4112 static void ip6_dst_ifdown(struct dst_entry *,
4113@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
4114
4115 static unsigned int ip6_mtu(const struct dst_entry *dst)
4116 {
4117- struct inet6_dev *idev;
4118- unsigned int mtu;
4119-
4120- mtu = dst_metric_raw(dst, RTAX_MTU);
4121- if (mtu)
4122- goto out;
4123-
4124- mtu = IPV6_MIN_MTU;
4125-
4126- rcu_read_lock();
4127- idev = __in6_dev_get(dst->dev);
4128- if (idev)
4129- mtu = idev->cnf.mtu6;
4130- rcu_read_unlock();
4131-
4132-out:
4133- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
4134-
4135- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
4136+ return ip6_dst_mtu_maybe_forward(dst, false);
4137 }
4138
4139 /* MTU selection:
4140diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
developer7eb15dc2023-06-14 17:44:03 +08004141index b6e0a62..5d690ab 100644
developer8cb3ac72022-07-04 10:55:14 +08004142--- a/net/netfilter/Kconfig
4143+++ b/net/netfilter/Kconfig
developer7eb15dc2023-06-14 17:44:03 +08004144@@ -689,8 +689,6 @@ config NFT_FIB_NETDEV
developer8cb3ac72022-07-04 10:55:14 +08004145
4146 endif # NF_TABLES_NETDEV
4147
4148-endif # NF_TABLES
4149-
4150 config NF_FLOW_TABLE_INET
4151 tristate "Netfilter flow table mixed IPv4/IPv6 module"
4152 depends on NF_FLOW_TABLE
developer7eb15dc2023-06-14 17:44:03 +08004153@@ -699,11 +697,12 @@ config NF_FLOW_TABLE_INET
developer8cb3ac72022-07-04 10:55:14 +08004154
4155 To compile it as a module, choose M here.
4156
4157+endif # NF_TABLES
4158+
4159 config NF_FLOW_TABLE
4160 tristate "Netfilter flow table module"
4161 depends on NETFILTER_INGRESS
4162 depends on NF_CONNTRACK
4163- depends on NF_TABLES
4164 help
4165 This option adds the flow table core infrastructure.
4166
developer7eb15dc2023-06-14 17:44:03 +08004167@@ -983,6 +982,15 @@ config NETFILTER_XT_TARGET_NOTRACK
developer8cb3ac72022-07-04 10:55:14 +08004168 depends on NETFILTER_ADVANCED
4169 select NETFILTER_XT_TARGET_CT
4170
4171+config NETFILTER_XT_TARGET_FLOWOFFLOAD
4172+ tristate '"FLOWOFFLOAD" target support'
4173+ depends on NF_FLOW_TABLE
4174+ depends on NETFILTER_INGRESS
4175+ help
4176+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
4177+ module to speed up processing of packets by bypassing the usual
4178+ netfilter chains
4179+
4180 config NETFILTER_XT_TARGET_RATEEST
4181 tristate '"RATEEST" target support'
4182 depends on NETFILTER_ADVANCED
4183diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
developer7eb15dc2023-06-14 17:44:03 +08004184index 4fc075b..d93a121 100644
developer8cb3ac72022-07-04 10:55:14 +08004185--- a/net/netfilter/Makefile
4186+++ b/net/netfilter/Makefile
4187@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
4188
4189 # flow table infrastructure
4190 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
4191-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
4192+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
4193+ nf_flow_table_offload.o
4194
4195 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
4196
4197@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
4198 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
4199 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
4200 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
4201+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
4202 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
4203 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
4204 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
4205diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
developer7eb15dc2023-06-14 17:44:03 +08004206index f6ab6f4..f689e19 100644
developer8cb3ac72022-07-04 10:55:14 +08004207--- a/net/netfilter/nf_conntrack_core.c
4208+++ b/net/netfilter/nf_conntrack_core.c
developer7eb15dc2023-06-14 17:44:03 +08004209@@ -864,9 +864,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08004210 }
4211 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
4212
4213-static inline void nf_ct_acct_update(struct nf_conn *ct,
4214- enum ip_conntrack_info ctinfo,
4215- unsigned int len)
4216+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
4217+ unsigned int bytes)
4218 {
4219 struct nf_conn_acct *acct;
4220
4221@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
4222 if (acct) {
4223 struct nf_conn_counter *counter = acct->counter;
4224
4225- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
4226- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
4227+ atomic64_add(packets, &counter[dir].packets);
4228+ atomic64_add(bytes, &counter[dir].bytes);
4229 }
4230 }
4231+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
4232
4233 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
4234 const struct nf_conn *loser_ct)
4235@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
4236
4237 /* u32 should be fine since we must have seen one packet. */
4238 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
4239- nf_ct_acct_update(ct, ctinfo, bytes);
4240+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
4241 }
4242 }
4243
4244@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
4245
4246 tmp = nf_ct_tuplehash_to_ctrack(h);
4247
4248- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
4249+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
4250+ nf_ct_offload_timeout(tmp);
4251 continue;
4252+ }
4253
4254 if (nf_ct_is_expired(tmp)) {
4255 nf_ct_gc_expired(tmp);
4256@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
4257 WRITE_ONCE(ct->timeout, extra_jiffies);
4258 acct:
4259 if (do_acct)
4260- nf_ct_acct_update(ct, ctinfo, skb->len);
4261+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
4262 }
4263 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
4264
4265@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
4266 enum ip_conntrack_info ctinfo,
4267 const struct sk_buff *skb)
4268 {
4269- nf_ct_acct_update(ct, ctinfo, skb->len);
4270+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
4271
4272 return nf_ct_delete(ct, 0, 0);
4273 }
4274diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
developer7eb15dc2023-06-14 17:44:03 +08004275index e219b6f..5cdc627 100644
developer8cb3ac72022-07-04 10:55:14 +08004276--- a/net/netfilter/nf_conntrack_proto_tcp.c
4277+++ b/net/netfilter/nf_conntrack_proto_tcp.c
developer7eb15dc2023-06-14 17:44:03 +08004278@@ -1463,6 +1463,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08004279 tn->tcp_loose = nf_ct_tcp_loose;
4280 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
4281 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
4282+
4283+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4284+ tn->offload_timeout = 30 * HZ;
4285+#endif
4286 }
4287
4288 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
4289diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
developer7eb15dc2023-06-14 17:44:03 +08004290index e3a2d01..a1579d6 100644
developer8cb3ac72022-07-04 10:55:14 +08004291--- a/net/netfilter/nf_conntrack_proto_udp.c
4292+++ b/net/netfilter/nf_conntrack_proto_udp.c
4293@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
4294
4295 for (i = 0; i < UDP_CT_MAX; i++)
4296 un->timeouts[i] = udp_timeouts[i];
4297+
4298+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4299+ un->offload_timeout = 30 * HZ;
4300+#endif
4301 }
4302
4303 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
4304diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
developer7eb15dc2023-06-14 17:44:03 +08004305index 236954e..be7513a 100644
developer8cb3ac72022-07-04 10:55:14 +08004306--- a/net/netfilter/nf_conntrack_standalone.c
4307+++ b/net/netfilter/nf_conntrack_standalone.c
4308@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
4309 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
4310 goto release;
4311
4312- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
4313+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
4314+ seq_puts(s, "[HW_OFFLOAD] ");
4315+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
4316 seq_puts(s, "[OFFLOAD] ");
4317 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
4318 seq_puts(s, "[ASSURED] ");
4319@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
4320 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
4321 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
4322 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
4323+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4324+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
4325+#endif
4326 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
4327 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
4328 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
4329 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
4330 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
4331+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4332+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
4333+#endif
4334 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
4335 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
4336 #ifdef CONFIG_NF_CT_PROTO_SCTP
developer7eb15dc2023-06-14 17:44:03 +08004337@@ -811,6 +819,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08004338 .mode = 0644,
4339 .proc_handler = proc_dointvec_jiffies,
4340 },
4341+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4342+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
4343+ .procname = "nf_flowtable_tcp_timeout",
4344+ .maxlen = sizeof(unsigned int),
4345+ .mode = 0644,
4346+ .proc_handler = proc_dointvec_jiffies,
4347+ },
4348+#endif
4349 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
4350 .procname = "nf_conntrack_tcp_loose",
4351 .maxlen = sizeof(int),
developer7eb15dc2023-06-14 17:44:03 +08004352@@ -845,6 +861,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08004353 .mode = 0644,
4354 .proc_handler = proc_dointvec_jiffies,
4355 },
4356+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4357+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
4358+ .procname = "nf_flowtable_udp_timeout",
4359+ .maxlen = sizeof(unsigned int),
4360+ .mode = 0644,
4361+ .proc_handler = proc_dointvec_jiffies,
4362+ },
4363+#endif
4364 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
4365 .procname = "nf_conntrack_icmp_timeout",
4366 .maxlen = sizeof(unsigned int),
developer7eb15dc2023-06-14 17:44:03 +08004367@@ -1021,6 +1045,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
developer8cb3ac72022-07-04 10:55:14 +08004368 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
4369 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
4370 #undef XASSIGN
4371+
4372+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4373+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
4374+#endif
4375+
4376 }
4377
4378 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
developer7eb15dc2023-06-14 17:44:03 +08004379@@ -1107,6 +1136,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08004380 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
4381 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
4382 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
4383+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
4384+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
4385+#endif
4386
4387 nf_conntrack_standalone_init_tcp_sysctl(net, table);
4388 nf_conntrack_standalone_init_sctp_sysctl(net, table);
4389diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developer7eb15dc2023-06-14 17:44:03 +08004390index f212cec..0a701f0 100644
developer8cb3ac72022-07-04 10:55:14 +08004391--- a/net/netfilter/nf_flow_table_core.c
4392+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08004393@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08004394 #include <linux/netdevice.h>
4395 #include <net/ip.h>
4396 #include <net/ip6_route.h>
4397-#include <net/netfilter/nf_tables.h>
4398 #include <net/netfilter/nf_flow_table.h>
4399 #include <net/netfilter/nf_conntrack.h>
4400 #include <net/netfilter/nf_conntrack_core.h>
4401 #include <net/netfilter/nf_conntrack_l4proto.h>
4402 #include <net/netfilter/nf_conntrack_tuple.h>
4403
4404-struct flow_offload_entry {
4405- struct flow_offload flow;
4406- struct nf_conn *ct;
4407- struct rcu_head rcu_head;
4408-};
4409-
4410 static DEFINE_MUTEX(flowtable_lock);
4411 static LIST_HEAD(flowtables);
4412
developerb7c46752022-07-04 19:51:38 +08004413-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
4414-{
4415- const struct rt6_info *rt;
4416-
4417- if (flow_tuple->l3proto == NFPROTO_IPV6) {
4418- rt = (const struct rt6_info *)flow_tuple->dst_cache;
4419- return rt6_get_cookie(rt);
4420- }
4421-
4422- return 0;
4423-}
4424-
developer8cb3ac72022-07-04 10:55:14 +08004425 static void
4426-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
4427- struct nf_flow_route *route,
4428+flow_offload_fill_dir(struct flow_offload *flow,
4429 enum flow_offload_tuple_dir dir)
4430 {
4431 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
4432- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
4433- struct dst_entry *other_dst = route->tuple[!dir].dst;
4434- struct dst_entry *dst = route->tuple[dir].dst;
4435+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
4436
4437 ft->dir = dir;
4438
developerb7c46752022-07-04 19:51:38 +08004439@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08004440 case NFPROTO_IPV4:
4441 ft->src_v4 = ctt->src.u3.in;
4442 ft->dst_v4 = ctt->dst.u3.in;
4443- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
4444 break;
4445 case NFPROTO_IPV6:
4446 ft->src_v6 = ctt->src.u3.in6;
4447 ft->dst_v6 = ctt->dst.u3.in6;
4448- ft->mtu = ip6_dst_mtu_forward(dst);
4449 break;
4450 }
4451
developerb7c46752022-07-04 19:51:38 +08004452@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08004453 ft->l4proto = ctt->dst.protonum;
4454 ft->src_port = ctt->src.u.tcp.port;
4455 ft->dst_port = ctt->dst.u.tcp.port;
4456-
4457- ft->iifidx = other_dst->dev->ifindex;
4458- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08004459- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08004460 }
4461
4462-struct flow_offload *
4463-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
4464+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
4465 {
4466- struct flow_offload_entry *entry;
4467 struct flow_offload *flow;
4468
4469 if (unlikely(nf_ct_is_dying(ct) ||
4470 !atomic_inc_not_zero(&ct->ct_general.use)))
4471 return NULL;
4472
4473- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
4474- if (!entry)
4475+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
4476+ if (!flow)
4477 goto err_ct_refcnt;
4478
4479- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08004480-
developer8cb3ac72022-07-04 10:55:14 +08004481- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
4482- goto err_dst_cache_original;
developerb7c46752022-07-04 19:51:38 +08004483+ flow->ct = ct;
4484
developer7eb15dc2023-06-14 17:44:03 +08004485- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
4486- goto err_dst_cache_reply;
4487-
developer8cb3ac72022-07-04 10:55:14 +08004488- entry->ct = ct;
4489-
4490- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
4491- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
4492+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
4493+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
4494
4495 if (ct->status & IPS_SRC_NAT)
4496- flow->flags |= FLOW_OFFLOAD_SNAT;
4497+ __set_bit(NF_FLOW_SNAT, &flow->flags);
4498 if (ct->status & IPS_DST_NAT)
4499- flow->flags |= FLOW_OFFLOAD_DNAT;
4500+ __set_bit(NF_FLOW_DNAT, &flow->flags);
4501
4502 return flow;
4503
4504-err_dst_cache_reply:
4505- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
4506-err_dst_cache_original:
4507- kfree(entry);
4508 err_ct_refcnt:
4509 nf_ct_put(ct);
4510
developer7eb15dc2023-06-14 17:44:03 +08004511@@ -115,65 +73,155 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08004512 }
4513 EXPORT_SYMBOL_GPL(flow_offload_alloc);
4514
4515-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
4516+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
4517 {
4518- tcp->state = TCP_CONNTRACK_ESTABLISHED;
4519- tcp->seen[0].td_maxwin = 0;
4520- tcp->seen[1].td_maxwin = 0;
4521+ const struct rt6_info *rt;
4522+
4523+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
4524+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
4525+ return rt6_get_cookie(rt);
4526+ }
4527+
4528+ return 0;
4529 }
4530
4531-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
4532-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
4533+static int flow_offload_fill_route(struct flow_offload *flow,
4534+ const struct nf_flow_route *route,
4535+ enum flow_offload_tuple_dir dir)
4536+{
4537+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
4538+ struct dst_entry *dst = route->tuple[dir].dst;
4539+ int i, j = 0;
developer7eb15dc2023-06-14 17:44:03 +08004540
4541-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08004542+ switch (flow_tuple->l3proto) {
4543+ case NFPROTO_IPV4:
4544+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
4545+ break;
4546+ case NFPROTO_IPV6:
4547+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
4548+ break;
4549+ }
4550+
4551+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
4552+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
4553+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
4554+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
4555+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
4556+ flow_tuple->in_vlan_ingress |= BIT(j);
4557+ j++;
4558+ }
4559+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
4560+
4561+ switch (route->tuple[dir].xmit_type) {
4562+ case FLOW_OFFLOAD_XMIT_DIRECT:
4563+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
4564+ ETH_ALEN);
4565+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
4566+ ETH_ALEN);
4567+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
4568+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
4569+ break;
4570+ case FLOW_OFFLOAD_XMIT_XFRM:
4571+ case FLOW_OFFLOAD_XMIT_NEIGH:
4572+ if (!dst_hold_safe(route->tuple[dir].dst))
4573+ return -1;
4574+
4575+ flow_tuple->dst_cache = dst;
4576+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
4577+ break;
4578+ default:
4579+ WARN_ON_ONCE(1);
4580+ break;
4581+ }
4582+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08004583+
developer8cb3ac72022-07-04 10:55:14 +08004584+ return 0;
4585+}
4586+
4587+static void nft_flow_dst_release(struct flow_offload *flow,
4588+ enum flow_offload_tuple_dir dir)
developer7eb15dc2023-06-14 17:44:03 +08004589 {
4590- return (__s32)(timeout - (u32)jiffies);
developer8cb3ac72022-07-04 10:55:14 +08004591+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
4592+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
4593+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
developer7eb15dc2023-06-14 17:44:03 +08004594 }
4595
4596-static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08004597+int flow_offload_route_init(struct flow_offload *flow,
4598+ const struct nf_flow_route *route)
developer7eb15dc2023-06-14 17:44:03 +08004599 {
4600- const struct nf_conntrack_l4proto *l4proto;
4601- int l4num = nf_ct_protonum(ct);
4602- unsigned int timeout;
developer8cb3ac72022-07-04 10:55:14 +08004603+ int err;
developer7eb15dc2023-06-14 17:44:03 +08004604
4605- l4proto = nf_ct_l4proto_find(l4num);
4606- if (!l4proto)
4607- return;
developer8cb3ac72022-07-04 10:55:14 +08004608+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
4609+ if (err < 0)
4610+ return err;
developer7eb15dc2023-06-14 17:44:03 +08004611
4612- if (l4num == IPPROTO_TCP)
4613- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
4614- else if (l4num == IPPROTO_UDP)
4615- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
4616- else
4617- return;
developer8cb3ac72022-07-04 10:55:14 +08004618+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
4619+ if (err < 0)
4620+ goto err_route_reply;
4621+
4622+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
developer7eb15dc2023-06-14 17:44:03 +08004623
4624- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
4625- ct->timeout = nfct_time_stamp + timeout;
developer8cb3ac72022-07-04 10:55:14 +08004626+ return 0;
4627+
4628+err_route_reply:
4629+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
4630+
4631+ return err;
developer7eb15dc2023-06-14 17:44:03 +08004632 }
developer8cb3ac72022-07-04 10:55:14 +08004633+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08004634
developer7eb15dc2023-06-14 17:44:03 +08004635-static void flow_offload_fixup_ct_state(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08004636+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
4637 {
developer7eb15dc2023-06-14 17:44:03 +08004638- if (nf_ct_protonum(ct) == IPPROTO_TCP)
4639- flow_offload_fixup_tcp(&ct->proto.tcp);
developer8cb3ac72022-07-04 10:55:14 +08004640+ tcp->seen[0].td_maxwin = 0;
4641+ tcp->seen[1].td_maxwin = 0;
4642 }
4643
developer7eb15dc2023-06-14 17:44:03 +08004644 static void flow_offload_fixup_ct(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08004645 {
developer7eb15dc2023-06-14 17:44:03 +08004646- flow_offload_fixup_ct_state(ct);
4647- flow_offload_fixup_ct_timeout(ct);
developer8cb3ac72022-07-04 10:55:14 +08004648+ struct net *net = nf_ct_net(ct);
developer7eb15dc2023-06-14 17:44:03 +08004649+ int l4num = nf_ct_protonum(ct);
developer8cb3ac72022-07-04 10:55:14 +08004650+ s32 timeout;
developer7eb15dc2023-06-14 17:44:03 +08004651+
developer8cb3ac72022-07-04 10:55:14 +08004652+ if (l4num == IPPROTO_TCP) {
4653+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developer7eb15dc2023-06-14 17:44:03 +08004654+
4655+ flow_offload_fixup_tcp(&ct->proto.tcp);
4656+
4657+ timeout = tn->timeouts[ct->proto.tcp.state];
developer8cb3ac72022-07-04 10:55:14 +08004658+ timeout -= tn->offload_timeout;
4659+ } else if (l4num == IPPROTO_UDP) {
4660+ struct nf_udp_net *tn = nf_udp_pernet(net);
4661+
4662+ timeout = tn->timeouts[UDP_CT_REPLIED];
4663+ timeout -= tn->offload_timeout;
4664+ } else {
developer7eb15dc2023-06-14 17:44:03 +08004665+ return;
developer8cb3ac72022-07-04 10:55:14 +08004666+ }
4667+
4668+ if (timeout < 0)
4669+ timeout = 0;
developer7eb15dc2023-06-14 17:44:03 +08004670+
developer8cb3ac72022-07-04 10:55:14 +08004671+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
4672+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
4673 }
4674
developer8cb3ac72022-07-04 10:55:14 +08004675-void flow_offload_free(struct flow_offload *flow)
4676+static void flow_offload_route_release(struct flow_offload *flow)
4677 {
4678- struct flow_offload_entry *e;
4679+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
4680+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
4681+}
4682
4683- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
4684- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
4685- e = container_of(flow, struct flow_offload_entry, flow);
4686- if (flow->flags & FLOW_OFFLOAD_DYING)
4687- nf_ct_delete(e->ct, 0, 0);
4688- nf_ct_put(e->ct);
4689- kfree_rcu(e, rcu_head);
4690+void flow_offload_free(struct flow_offload *flow)
4691+{
4692+ switch (flow->type) {
4693+ case NF_FLOW_OFFLOAD_ROUTE:
4694+ flow_offload_route_release(flow);
4695+ break;
4696+ default:
4697+ break;
4698+ }
4699+ nf_ct_put(flow->ct);
4700+ kfree_rcu(flow, rcu_head);
4701 }
4702 EXPORT_SYMBOL_GPL(flow_offload_free);
4703
developer7eb15dc2023-06-14 17:44:03 +08004704@@ -181,14 +229,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08004705 {
4706 const struct flow_offload_tuple *tuple = data;
4707
4708- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
4709+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
4710 }
4711
4712 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
4713 {
4714 const struct flow_offload_tuple_rhash *tuplehash = data;
4715
4716- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
4717+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
4718 }
4719
4720 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer7eb15dc2023-06-14 17:44:03 +08004721@@ -197,7 +245,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08004722 const struct flow_offload_tuple *tuple = arg->key;
4723 const struct flow_offload_tuple_rhash *x = ptr;
4724
4725- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
4726+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
4727 return 1;
4728
4729 return 0;
developer7eb15dc2023-06-14 17:44:03 +08004730@@ -211,30 +259,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08004731 .automatic_shrinking = true,
4732 };
4733
4734-#define DAY (86400 * HZ)
4735-
4736-/* Set an arbitrary timeout large enough not to ever expire, this save
4737- * us a check for the IPS_OFFLOAD_BIT from the packet path via
4738- * nf_ct_is_expired().
4739- */
4740-static void nf_ct_offload_timeout(struct flow_offload *flow)
4741+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
4742 {
4743- struct flow_offload_entry *entry;
4744- struct nf_conn *ct;
4745+ unsigned long timeout = NF_FLOW_TIMEOUT;
4746+ struct net *net = nf_ct_net(flow->ct);
4747+ int l4num = nf_ct_protonum(flow->ct);
developer7eb15dc2023-06-14 17:44:03 +08004748+
developerb7c46752022-07-04 19:51:38 +08004749+ if (l4num == IPPROTO_TCP) {
4750+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developer7eb15dc2023-06-14 17:44:03 +08004751+
developer8cb3ac72022-07-04 10:55:14 +08004752+ timeout = tn->offload_timeout;
4753+ } else if (l4num == IPPROTO_UDP) {
4754+ struct nf_udp_net *tn = nf_udp_pernet(net);
developer7eb15dc2023-06-14 17:44:03 +08004755
4756- entry = container_of(flow, struct flow_offload_entry, flow);
4757- ct = entry->ct;
developer8cb3ac72022-07-04 10:55:14 +08004758+ timeout = tn->offload_timeout;
4759+ }
developer7eb15dc2023-06-14 17:44:03 +08004760
4761- if (nf_ct_expires(ct) < DAY / 2)
4762- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08004763+ return timeout;
4764 }
4765
4766 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
4767 {
4768 int err;
4769
4770- nf_ct_offload_timeout(flow);
4771- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4772+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
4773
4774 err = rhashtable_insert_fast(&flow_table->rhashtable,
4775 &flow->tuplehash[0].node,
developer7eb15dc2023-06-14 17:44:03 +08004776@@ -252,10 +300,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08004777 return err;
4778 }
4779
4780+ nf_ct_offload_timeout(flow->ct);
4781+
4782+ if (nf_flowtable_hw_offload(flow_table)) {
4783+ __set_bit(NF_FLOW_HW, &flow->flags);
4784+ nf_flow_offload_add(flow_table, flow);
4785+ }
4786+
4787 return 0;
4788 }
4789 EXPORT_SYMBOL_GPL(flow_offload_add);
4790
4791+void flow_offload_refresh(struct nf_flowtable *flow_table,
4792+ struct flow_offload *flow)
4793+{
4794+ u32 timeout;
4795+
4796+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
4797+ if (timeout - READ_ONCE(flow->timeout) > HZ)
4798+ WRITE_ONCE(flow->timeout, timeout);
4799+ else
4800+ return;
4801+
4802+ if (likely(!nf_flowtable_hw_offload(flow_table)))
4803+ return;
4804+
4805+ nf_flow_offload_add(flow_table, flow);
4806+}
4807+EXPORT_SYMBOL_GPL(flow_offload_refresh);
4808+
4809 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
4810 {
4811 return nf_flow_timeout_delta(flow->timeout) <= 0;
developer7eb15dc2023-06-14 17:44:03 +08004812@@ -264,37 +337,20 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08004813 static void flow_offload_del(struct nf_flowtable *flow_table,
4814 struct flow_offload *flow)
4815 {
4816- struct flow_offload_entry *e;
4817-
4818 rhashtable_remove_fast(&flow_table->rhashtable,
4819 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
4820 nf_flow_offload_rhash_params);
developer7eb15dc2023-06-14 17:44:03 +08004821 rhashtable_remove_fast(&flow_table->rhashtable,
developer8cb3ac72022-07-04 10:55:14 +08004822 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
4823 nf_flow_offload_rhash_params);
developer7eb15dc2023-06-14 17:44:03 +08004824-
developer8cb3ac72022-07-04 10:55:14 +08004825- e = container_of(flow, struct flow_offload_entry, flow);
4826- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
developer7eb15dc2023-06-14 17:44:03 +08004827-
4828- if (nf_flow_has_expired(flow))
developer8cb3ac72022-07-04 10:55:14 +08004829- flow_offload_fixup_ct(e->ct);
4830- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
4831- flow_offload_fixup_ct_timeout(e->ct);
4832-
4833- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
4834- flow_offload_fixup_ct_state(e->ct);
developer7eb15dc2023-06-14 17:44:03 +08004835-
developer8cb3ac72022-07-04 10:55:14 +08004836 flow_offload_free(flow);
4837 }
4838
4839 void flow_offload_teardown(struct flow_offload *flow)
4840 {
4841- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08004842-
4843- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developer7eb15dc2023-06-14 17:44:03 +08004844-
developer8cb3ac72022-07-04 10:55:14 +08004845- e = container_of(flow, struct flow_offload_entry, flow);
4846- flow_offload_fixup_ct_state(e->ct);
developer7eb15dc2023-06-14 17:44:03 +08004847+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
4848+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
4849+ flow_offload_fixup_ct(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08004850 }
4851 EXPORT_SYMBOL_GPL(flow_offload_teardown);
4852
developer7eb15dc2023-06-14 17:44:03 +08004853@@ -304,7 +360,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08004854 {
4855 struct flow_offload_tuple_rhash *tuplehash;
4856 struct flow_offload *flow;
4857- struct flow_offload_entry *e;
4858 int dir;
4859
4860 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developer7eb15dc2023-06-14 17:44:03 +08004861@@ -314,19 +369,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08004862
4863 dir = tuplehash->tuple.dir;
4864 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4865- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
4866+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
4867 return NULL;
4868
4869- e = container_of(flow, struct flow_offload_entry, flow);
4870- if (unlikely(nf_ct_is_dying(e->ct)))
4871+ if (unlikely(nf_ct_is_dying(flow->ct)))
4872 return NULL;
4873
4874 return tuplehash;
4875 }
4876 EXPORT_SYMBOL_GPL(flow_offload_lookup);
4877
4878-static int
4879-nf_flow_table_iterate(struct nf_flowtable *flow_table,
4880+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
4881 void (*iter)(struct flow_offload *flow, void *data),
4882 void *data)
4883 {
developer7eb15dc2023-06-14 17:44:03 +08004884@@ -339,7 +392,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08004885 rhashtable_walk_start(&hti);
4886
4887 while ((tuplehash = rhashtable_walk_next(&hti))) {
4888-
4889 if (IS_ERR(tuplehash)) {
4890 if (PTR_ERR(tuplehash) != -EAGAIN) {
4891 err = PTR_ERR(tuplehash);
developer7eb15dc2023-06-14 17:44:03 +08004892@@ -359,23 +411,28 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08004893
4894 return err;
4895 }
4896+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
4897
developer7eb15dc2023-06-14 17:44:03 +08004898 static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
developer8cb3ac72022-07-04 10:55:14 +08004899 {
developer7eb15dc2023-06-14 17:44:03 +08004900 struct nf_flowtable *flow_table = data;
developer8cb3ac72022-07-04 10:55:14 +08004901- struct flow_offload_entry *e;
4902- bool teardown;
developer8cb3ac72022-07-04 10:55:14 +08004903
4904- e = container_of(flow, struct flow_offload_entry, flow);
developer7eb15dc2023-06-14 17:44:03 +08004905-
developer8cb3ac72022-07-04 10:55:14 +08004906- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
4907- FLOW_OFFLOAD_TEARDOWN);
developer7eb15dc2023-06-14 17:44:03 +08004908-
developer8cb3ac72022-07-04 10:55:14 +08004909- if (!teardown)
4910- nf_ct_offload_timeout(flow);
developer7eb15dc2023-06-14 17:44:03 +08004911+ if (nf_flow_has_expired(flow) ||
4912+ nf_ct_is_dying(flow->ct))
4913+ flow_offload_teardown(flow);
developer8cb3ac72022-07-04 10:55:14 +08004914
4915- if (nf_flow_has_expired(flow) || teardown)
4916- flow_offload_del(flow_table, flow);
developer8cb3ac72022-07-04 10:55:14 +08004917+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
4918+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
4919+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
4920+ nf_flow_offload_del(flow_table, flow);
4921+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
4922+ flow_offload_del(flow_table, flow);
4923+ } else {
4924+ flow_offload_del(flow_table, flow);
4925+ }
4926+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
developer7eb15dc2023-06-14 17:44:03 +08004927+ nf_flow_offload_stats(flow_table, flow);
developer8cb3ac72022-07-04 10:55:14 +08004928+ }
4929 }
4930
4931 static void nf_flow_offload_work_gc(struct work_struct *work)
developer7eb15dc2023-06-14 17:44:03 +08004932@@ -387,30 +444,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08004933 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
4934 }
4935
4936-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
4937- __be16 port, __be16 new_port)
4938+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
4939+ __be16 port, __be16 new_port)
4940 {
4941 struct tcphdr *tcph;
4942
4943- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4944- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4945- return -1;
4946-
4947 tcph = (void *)(skb_network_header(skb) + thoff);
4948 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
4949-
4950- return 0;
4951 }
4952
4953-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
4954- __be16 port, __be16 new_port)
4955+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
4956+ __be16 port, __be16 new_port)
4957 {
4958 struct udphdr *udph;
4959
4960- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4961- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4962- return -1;
4963-
4964 udph = (void *)(skb_network_header(skb) + thoff);
4965 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4966 inet_proto_csum_replace2(&udph->check, skb, port,
developer7eb15dc2023-06-14 17:44:03 +08004967@@ -418,38 +465,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004968 if (!udph->check)
4969 udph->check = CSUM_MANGLED_0;
4970 }
4971-
4972- return 0;
4973 }
4974
4975-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4976- u8 protocol, __be16 port, __be16 new_port)
4977+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4978+ u8 protocol, __be16 port, __be16 new_port)
4979 {
4980 switch (protocol) {
4981 case IPPROTO_TCP:
4982- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4983- return NF_DROP;
4984+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4985 break;
4986 case IPPROTO_UDP:
4987- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4988- return NF_DROP;
4989+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4990 break;
4991 }
4992-
4993- return 0;
4994 }
4995
4996-int nf_flow_snat_port(const struct flow_offload *flow,
4997- struct sk_buff *skb, unsigned int thoff,
4998- u8 protocol, enum flow_offload_tuple_dir dir)
4999+void nf_flow_snat_port(const struct flow_offload *flow,
5000+ struct sk_buff *skb, unsigned int thoff,
5001+ u8 protocol, enum flow_offload_tuple_dir dir)
5002 {
5003 struct flow_ports *hdr;
5004 __be16 port, new_port;
5005
5006- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
5007- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
5008- return -1;
5009-
5010 hdr = (void *)(skb_network_header(skb) + thoff);
5011
5012 switch (dir) {
developer7eb15dc2023-06-14 17:44:03 +08005013@@ -463,25 +500,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08005014 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
5015 hdr->dest = new_port;
5016 break;
5017- default:
5018- return -1;
5019 }
5020
5021- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
5022+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
5023 }
5024 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
5025
5026-int nf_flow_dnat_port(const struct flow_offload *flow,
5027- struct sk_buff *skb, unsigned int thoff,
5028- u8 protocol, enum flow_offload_tuple_dir dir)
5029+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
5030+ unsigned int thoff, u8 protocol,
5031+ enum flow_offload_tuple_dir dir)
5032 {
5033 struct flow_ports *hdr;
5034 __be16 port, new_port;
5035
5036- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
5037- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
5038- return -1;
5039-
5040 hdr = (void *)(skb_network_header(skb) + thoff);
5041
5042 switch (dir) {
developer7eb15dc2023-06-14 17:44:03 +08005043@@ -495,11 +526,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08005044 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
5045 hdr->source = new_port;
5046 break;
5047- default:
5048- return -1;
5049 }
5050
5051- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
5052+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
5053 }
5054 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
5055
developer7eb15dc2023-06-14 17:44:03 +08005056@@ -507,7 +536,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08005057 {
5058 int err;
5059
5060- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
5061+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
5062+ flow_block_init(&flowtable->flow_block);
5063+ init_rwsem(&flowtable->flow_block_lock);
5064
5065 err = rhashtable_init(&flowtable->rhashtable,
5066 &nf_flow_offload_rhash_params);
developer7eb15dc2023-06-14 17:44:03 +08005067@@ -528,25 +559,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08005068 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
5069 {
5070 struct net_device *dev = data;
5071- struct flow_offload_entry *e;
5072-
5073- e = container_of(flow, struct flow_offload_entry, flow);
5074
5075 if (!dev) {
5076 flow_offload_teardown(flow);
5077 return;
5078 }
5079- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
5080+
5081+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
5082 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
5083 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
5084- flow_offload_dead(flow);
5085+ flow_offload_teardown(flow);
5086 }
5087
5088-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
5089- struct net_device *dev)
5090+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
5091+ struct net_device *dev)
5092 {
5093 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
5094 flush_delayed_work(&flowtable->gc_work);
5095+ nf_flow_table_offload_flush(flowtable);
5096 }
5097
5098 void nf_flow_table_cleanup(struct net_device *dev)
developer7eb15dc2023-06-14 17:44:03 +08005099@@ -555,7 +585,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08005100
5101 mutex_lock(&flowtable_lock);
5102 list_for_each_entry(flowtable, &flowtables, list)
5103- nf_flow_table_iterate_cleanup(flowtable, dev);
5104+ nf_flow_table_gc_cleanup(flowtable, dev);
5105 mutex_unlock(&flowtable_lock);
5106 }
5107 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developer7eb15dc2023-06-14 17:44:03 +08005108@@ -565,9 +595,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08005109 mutex_lock(&flowtable_lock);
5110 list_del(&flow_table->list);
5111 mutex_unlock(&flowtable_lock);
5112+
5113 cancel_delayed_work_sync(&flow_table->gc_work);
5114 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
5115 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
5116+ nf_flow_table_offload_flush(flow_table);
5117+ if (nf_flowtable_hw_offload(flow_table))
5118+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
5119+ flow_table);
5120 rhashtable_destroy(&flow_table->rhashtable);
5121 }
5122 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developer7eb15dc2023-06-14 17:44:03 +08005123@@ -591,12 +626,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08005124
5125 static int __init nf_flow_table_module_init(void)
5126 {
5127- return register_netdevice_notifier(&flow_offload_netdev_notifier);
5128+ int ret;
5129+
5130+ ret = nf_flow_table_offload_init();
5131+ if (ret)
5132+ return ret;
5133+
5134+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
5135+ if (ret)
5136+ nf_flow_table_offload_exit();
5137+
5138+ return ret;
5139 }
5140
5141 static void __exit nf_flow_table_module_exit(void)
5142 {
5143 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
5144+ nf_flow_table_offload_exit();
5145 }
5146
5147 module_init(nf_flow_table_module_init);
developer7eb15dc2023-06-14 17:44:03 +08005148@@ -604,3 +650,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08005149
5150 MODULE_LICENSE("GPL");
5151 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
5152+MODULE_DESCRIPTION("Netfilter flow table module");
5153diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developer7eb15dc2023-06-14 17:44:03 +08005154index 397129b..2802646 100644
developer8cb3ac72022-07-04 10:55:14 +08005155--- a/net/netfilter/nf_flow_table_ip.c
5156+++ b/net/netfilter/nf_flow_table_ip.c
5157@@ -7,11 +7,13 @@
5158 #include <linux/ip.h>
5159 #include <linux/ipv6.h>
5160 #include <linux/netdevice.h>
5161+#include <linux/if_ether.h>
5162 #include <net/ip.h>
5163 #include <net/ipv6.h>
5164 #include <net/ip6_route.h>
5165 #include <net/neighbour.h>
5166 #include <net/netfilter/nf_flow_table.h>
5167+#include <net/netfilter/nf_conntrack_acct.h>
5168 /* For layer 4 checksum field offset. */
5169 #include <linux/tcp.h>
5170 #include <linux/udp.h>
5171@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
5172 if (proto != IPPROTO_TCP)
5173 return 0;
5174
5175- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
5176- return -1;
5177-
5178 tcph = (void *)(skb_network_header(skb) + thoff);
5179 if (unlikely(tcph->fin || tcph->rst)) {
5180 flow_offload_teardown(flow);
5181@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
5182 return 0;
5183 }
5184
5185-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
5186- __be32 addr, __be32 new_addr)
5187+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
5188+ __be32 addr, __be32 new_addr)
5189 {
5190 struct tcphdr *tcph;
5191
5192- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
5193- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
5194- return -1;
5195-
5196 tcph = (void *)(skb_network_header(skb) + thoff);
5197 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
5198-
5199- return 0;
5200 }
5201
5202-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
5203- __be32 addr, __be32 new_addr)
5204+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
5205+ __be32 addr, __be32 new_addr)
5206 {
5207 struct udphdr *udph;
5208
5209- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
5210- skb_try_make_writable(skb, thoff + sizeof(*udph)))
5211- return -1;
5212-
5213 udph = (void *)(skb_network_header(skb) + thoff);
5214 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
5215 inet_proto_csum_replace4(&udph->check, skb, addr,
5216@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
5217 if (!udph->check)
5218 udph->check = CSUM_MANGLED_0;
5219 }
5220-
5221- return 0;
5222 }
5223
5224-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
5225- unsigned int thoff, __be32 addr,
5226- __be32 new_addr)
5227+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
5228+ unsigned int thoff, __be32 addr,
5229+ __be32 new_addr)
5230 {
5231 switch (iph->protocol) {
5232 case IPPROTO_TCP:
5233- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
5234- return NF_DROP;
5235+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
5236 break;
5237 case IPPROTO_UDP:
5238- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
5239- return NF_DROP;
5240+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
5241 break;
5242 }
5243-
5244- return 0;
5245 }
5246
5247-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
5248- struct iphdr *iph, unsigned int thoff,
5249- enum flow_offload_tuple_dir dir)
5250+static void nf_flow_snat_ip(const struct flow_offload *flow,
5251+ struct sk_buff *skb, struct iphdr *iph,
5252+ unsigned int thoff, enum flow_offload_tuple_dir dir)
5253 {
5254 __be32 addr, new_addr;
5255
5256@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
5257 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5258 iph->daddr = new_addr;
5259 break;
5260- default:
5261- return -1;
5262 }
5263 csum_replace4(&iph->check, addr, new_addr);
5264
5265- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
5266+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
5267 }
5268
5269-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
5270- struct iphdr *iph, unsigned int thoff,
5271- enum flow_offload_tuple_dir dir)
5272+static void nf_flow_dnat_ip(const struct flow_offload *flow,
5273+ struct sk_buff *skb, struct iphdr *iph,
5274+ unsigned int thoff, enum flow_offload_tuple_dir dir)
5275 {
5276 __be32 addr, new_addr;
5277
5278@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
5279 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5280 iph->saddr = new_addr;
5281 break;
5282- default:
5283- return -1;
5284 }
5285 csum_replace4(&iph->check, addr, new_addr);
5286
5287- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
5288+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
5289 }
5290
5291-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
5292- unsigned int thoff, enum flow_offload_tuple_dir dir)
5293+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
5294+ unsigned int thoff, enum flow_offload_tuple_dir dir,
5295+ struct iphdr *iph)
5296 {
5297- struct iphdr *iph = ip_hdr(skb);
5298-
5299- if (flow->flags & FLOW_OFFLOAD_SNAT &&
5300- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
5301- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
5302- return -1;
5303- if (flow->flags & FLOW_OFFLOAD_DNAT &&
5304- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
5305- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
5306- return -1;
5307-
5308- return 0;
5309+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5310+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
5311+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
5312+ }
5313+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5314+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
5315+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
5316+ }
5317 }
5318
5319 static bool ip_has_options(unsigned int thoff)
5320@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
5321 return thoff != sizeof(struct iphdr);
5322 }
5323
5324+static void nf_flow_tuple_encap(struct sk_buff *skb,
5325+ struct flow_offload_tuple *tuple)
5326+{
5327+ struct vlan_ethhdr *veth;
5328+ struct pppoe_hdr *phdr;
5329+ int i = 0;
5330+
5331+ if (skb_vlan_tag_present(skb)) {
5332+ tuple->encap[i].id = skb_vlan_tag_get(skb);
5333+ tuple->encap[i].proto = skb->vlan_proto;
5334+ i++;
5335+ }
5336+ switch (skb->protocol) {
5337+ case htons(ETH_P_8021Q):
5338+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
5339+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
5340+ tuple->encap[i].proto = skb->protocol;
5341+ break;
5342+ case htons(ETH_P_PPP_SES):
5343+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
5344+ tuple->encap[i].id = ntohs(phdr->sid);
5345+ tuple->encap[i].proto = skb->protocol;
5346+ break;
5347+ }
5348+}
5349+
5350 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
5351- struct flow_offload_tuple *tuple)
5352+ struct flow_offload_tuple *tuple, u32 *hdrsize,
5353+ u32 offset)
5354 {
5355 struct flow_ports *ports;
5356 unsigned int thoff;
5357 struct iphdr *iph;
5358
5359- if (!pskb_may_pull(skb, sizeof(*iph)))
5360+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
5361 return -1;
5362
5363- iph = ip_hdr(skb);
5364- thoff = iph->ihl * 4;
5365+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
5366+ thoff = (iph->ihl * 4);
5367
5368 if (ip_is_fragment(iph) ||
5369 unlikely(ip_has_options(thoff)))
5370 return -1;
5371
5372- if (iph->protocol != IPPROTO_TCP &&
5373- iph->protocol != IPPROTO_UDP)
5374+ thoff += offset;
5375+
5376+ switch (iph->protocol) {
5377+ case IPPROTO_TCP:
5378+ *hdrsize = sizeof(struct tcphdr);
5379+ break;
5380+ case IPPROTO_UDP:
5381+ *hdrsize = sizeof(struct udphdr);
5382+ break;
5383+ default:
5384 return -1;
5385+ }
5386
5387 if (iph->ttl <= 1)
5388 return -1;
5389
5390- thoff = iph->ihl * 4;
5391- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
5392+ if (!pskb_may_pull(skb, thoff + *hdrsize))
5393 return -1;
5394
5395- iph = ip_hdr(skb);
5396+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
5397 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
5398
5399 tuple->src_v4.s_addr = iph->saddr;
5400@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
5401 tuple->l3proto = AF_INET;
5402 tuple->l4proto = iph->protocol;
5403 tuple->iifidx = dev->ifindex;
5404+ nf_flow_tuple_encap(skb, tuple);
5405
5406 return 0;
5407 }
developer7eb15dc2023-06-14 17:44:03 +08005408@@ -215,6 +227,15 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
5409 return true;
5410 }
5411
5412+static inline bool nf_flow_dst_check(struct flow_offload_tuple *tuple)
5413+{
5414+ if (tuple->xmit_type != FLOW_OFFLOAD_XMIT_NEIGH &&
5415+ tuple->xmit_type != FLOW_OFFLOAD_XMIT_XFRM)
5416+ return true;
5417+
5418+ return dst_check(tuple->dst_cache, tuple->dst_cookie);
5419+}
5420+
5421 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
5422 const struct nf_hook_state *state,
5423 struct dst_entry *dst)
5424@@ -225,6 +246,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08005425 return NF_STOLEN;
5426 }
5427
5428+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
5429+ u32 *offset)
5430+{
5431+ struct vlan_ethhdr *veth;
5432+
5433+ switch (skb->protocol) {
5434+ case htons(ETH_P_8021Q):
5435+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
5436+ if (veth->h_vlan_encapsulated_proto == proto) {
5437+ *offset += VLAN_HLEN;
5438+ return true;
5439+ }
5440+ break;
5441+ case htons(ETH_P_PPP_SES):
5442+ if (nf_flow_pppoe_proto(skb) == proto) {
5443+ *offset += PPPOE_SES_HLEN;
5444+ return true;
5445+ }
5446+ break;
5447+ }
5448+
5449+ return false;
5450+}
5451+
5452+static void nf_flow_encap_pop(struct sk_buff *skb,
5453+ struct flow_offload_tuple_rhash *tuplehash)
5454+{
5455+ struct vlan_hdr *vlan_hdr;
5456+ int i;
5457+
5458+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
5459+ if (skb_vlan_tag_present(skb)) {
5460+ __vlan_hwaccel_clear_tag(skb);
5461+ continue;
5462+ }
5463+ switch (skb->protocol) {
5464+ case htons(ETH_P_8021Q):
5465+ vlan_hdr = (struct vlan_hdr *)skb->data;
5466+ __skb_pull(skb, VLAN_HLEN);
5467+ vlan_set_encap_proto(skb, vlan_hdr);
5468+ skb_reset_network_header(skb);
5469+ break;
5470+ case htons(ETH_P_PPP_SES):
5471+ skb->protocol = nf_flow_pppoe_proto(skb);
5472+ skb_pull(skb, PPPOE_SES_HLEN);
5473+ skb_reset_network_header(skb);
5474+ break;
5475+ }
5476+ }
5477+}
5478+
5479+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
5480+ const struct flow_offload_tuple_rhash *tuplehash,
5481+ unsigned short type)
5482+{
5483+ struct net_device *outdev;
5484+
5485+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
5486+ if (!outdev)
5487+ return NF_DROP;
5488+
5489+ skb->dev = outdev;
5490+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
5491+ tuplehash->tuple.out.h_source, skb->len);
5492+ dev_queue_xmit(skb);
5493+
5494+ return NF_STOLEN;
5495+}
5496+
5497 unsigned int
5498 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
5499 const struct nf_hook_state *state)
developer7eb15dc2023-06-14 17:44:03 +08005500@@ -235,15 +325,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08005501 enum flow_offload_tuple_dir dir;
5502 struct flow_offload *flow;
5503 struct net_device *outdev;
5504+ u32 hdrsize, offset = 0;
5505+ unsigned int thoff, mtu;
5506 struct rtable *rt;
5507- unsigned int thoff;
5508 struct iphdr *iph;
5509 __be32 nexthop;
5510+ int ret;
5511
5512- if (skb->protocol != htons(ETH_P_IP))
5513+ if (skb->protocol != htons(ETH_P_IP) &&
5514+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
5515 return NF_ACCEPT;
5516
5517- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
5518+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
5519 return NF_ACCEPT;
5520
5521 tuplehash = flow_offload_lookup(flow_table, &tuple);
developer7eb15dc2023-06-14 17:44:03 +08005522@@ -252,75 +345,85 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08005523
5524 dir = tuplehash->tuple.dir;
5525 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
5526- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
5527- outdev = rt->dst.dev;
developer7eb15dc2023-06-14 17:44:03 +08005528
developer8cb3ac72022-07-04 10:55:14 +08005529- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer7eb15dc2023-06-14 17:44:03 +08005530+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
5531+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
5532 return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08005533
developer8cb3ac72022-07-04 10:55:14 +08005534- if (skb_try_make_writable(skb, sizeof(*iph)))
5535- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08005536-
developer8cb3ac72022-07-04 10:55:14 +08005537- thoff = ip_hdr(skb)->ihl * 4;
5538- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
developer8cb3ac72022-07-04 10:55:14 +08005539+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
5540+ thoff = (iph->ihl * 4) + offset;
5541+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
5542 return NF_ACCEPT;
developer7eb15dc2023-06-14 17:44:03 +08005543
5544- if (!dst_check(&rt->dst, 0)) {
5545+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
5546 flow_offload_teardown(flow);
5547 return NF_ACCEPT;
5548 }
developer8cb3ac72022-07-04 10:55:14 +08005549
5550- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
5551+ if (skb_try_make_writable(skb, thoff + hdrsize))
5552 return NF_DROP;
5553
5554- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
5555+ flow_offload_refresh(flow_table, flow);
5556+
5557+ nf_flow_encap_pop(skb, tuplehash);
5558+ thoff -= offset;
5559+
5560 iph = ip_hdr(skb);
5561+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
5562+
5563 ip_decrease_ttl(iph);
5564 skb->tstamp = 0;
5565
5566- if (unlikely(dst_xfrm(&rt->dst))) {
5567+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
5568+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
5569+
5570+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
5571+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
5572 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
5573 IPCB(skb)->iif = skb->dev->ifindex;
5574 IPCB(skb)->flags = IPSKB_FORWARDED;
5575 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
5576 }
5577
5578- skb->dev = outdev;
5579- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
5580- skb_dst_set_noref(skb, &rt->dst);
5581- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
5582+ switch (tuplehash->tuple.xmit_type) {
5583+ case FLOW_OFFLOAD_XMIT_NEIGH:
5584+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
5585+ outdev = rt->dst.dev;
5586+ skb->dev = outdev;
5587+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
5588+ skb_dst_set_noref(skb, &rt->dst);
5589+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
5590+ ret = NF_STOLEN;
5591+ break;
5592+ case FLOW_OFFLOAD_XMIT_DIRECT:
5593+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
5594+ if (ret == NF_DROP)
5595+ flow_offload_teardown(flow);
5596+ break;
5597+ }
5598
5599- return NF_STOLEN;
5600+ return ret;
5601 }
5602 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
5603
5604-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
5605- struct in6_addr *addr,
5606- struct in6_addr *new_addr)
5607+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
5608+ struct in6_addr *addr,
5609+ struct in6_addr *new_addr,
5610+ struct ipv6hdr *ip6h)
5611 {
5612 struct tcphdr *tcph;
5613
5614- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
5615- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
5616- return -1;
5617-
5618 tcph = (void *)(skb_network_header(skb) + thoff);
5619 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
5620 new_addr->s6_addr32, true);
5621-
5622- return 0;
5623 }
5624
5625-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
5626- struct in6_addr *addr,
5627- struct in6_addr *new_addr)
5628+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
5629+ struct in6_addr *addr,
5630+ struct in6_addr *new_addr)
5631 {
5632 struct udphdr *udph;
5633
5634- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
5635- skb_try_make_writable(skb, thoff + sizeof(*udph)))
5636- return -1;
5637-
5638 udph = (void *)(skb_network_header(skb) + thoff);
5639 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
5640 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developer7eb15dc2023-06-14 17:44:03 +08005641@@ -328,32 +431,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08005642 if (!udph->check)
5643 udph->check = CSUM_MANGLED_0;
5644 }
5645-
5646- return 0;
5647 }
5648
5649-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
5650- unsigned int thoff, struct in6_addr *addr,
5651- struct in6_addr *new_addr)
5652+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
5653+ unsigned int thoff, struct in6_addr *addr,
5654+ struct in6_addr *new_addr)
5655 {
5656 switch (ip6h->nexthdr) {
5657 case IPPROTO_TCP:
5658- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
5659- return NF_DROP;
5660+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
5661 break;
5662 case IPPROTO_UDP:
5663- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
5664- return NF_DROP;
5665+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
5666 break;
5667 }
5668-
5669- return 0;
5670 }
5671
5672-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
5673- struct sk_buff *skb, struct ipv6hdr *ip6h,
5674- unsigned int thoff,
5675- enum flow_offload_tuple_dir dir)
5676+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
5677+ struct sk_buff *skb, struct ipv6hdr *ip6h,
5678+ unsigned int thoff,
5679+ enum flow_offload_tuple_dir dir)
5680 {
5681 struct in6_addr addr, new_addr;
5682
developer7eb15dc2023-06-14 17:44:03 +08005683@@ -368,17 +465,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08005684 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
5685 ip6h->daddr = new_addr;
5686 break;
5687- default:
5688- return -1;
5689 }
5690
5691- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
5692+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
5693 }
5694
5695-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
5696- struct sk_buff *skb, struct ipv6hdr *ip6h,
5697- unsigned int thoff,
5698- enum flow_offload_tuple_dir dir)
5699+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
5700+ struct sk_buff *skb, struct ipv6hdr *ip6h,
5701+ unsigned int thoff,
5702+ enum flow_offload_tuple_dir dir)
5703 {
5704 struct in6_addr addr, new_addr;
5705
developer7eb15dc2023-06-14 17:44:03 +08005706@@ -393,56 +488,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08005707 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
5708 ip6h->saddr = new_addr;
5709 break;
5710- default:
5711- return -1;
5712 }
5713
5714- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
5715+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
5716 }
5717
5718-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
5719- struct sk_buff *skb,
5720- enum flow_offload_tuple_dir dir)
5721+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
5722+ struct sk_buff *skb,
5723+ enum flow_offload_tuple_dir dir,
5724+ struct ipv6hdr *ip6h)
5725 {
5726- struct ipv6hdr *ip6h = ipv6_hdr(skb);
5727 unsigned int thoff = sizeof(*ip6h);
5728
5729- if (flow->flags & FLOW_OFFLOAD_SNAT &&
5730- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
5731- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
5732- return -1;
5733- if (flow->flags & FLOW_OFFLOAD_DNAT &&
5734- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
5735- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
5736- return -1;
5737-
5738- return 0;
5739+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5740+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
5741+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
5742+ }
5743+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5744+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
5745+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
5746+ }
5747 }
5748
5749 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
5750- struct flow_offload_tuple *tuple)
5751+ struct flow_offload_tuple *tuple, u32 *hdrsize,
5752+ u32 offset)
5753 {
5754 struct flow_ports *ports;
5755 struct ipv6hdr *ip6h;
5756 unsigned int thoff;
5757
5758- if (!pskb_may_pull(skb, sizeof(*ip6h)))
5759+ thoff = sizeof(*ip6h) + offset;
5760+ if (!pskb_may_pull(skb, thoff))
5761 return -1;
5762
5763- ip6h = ipv6_hdr(skb);
5764+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
5765
5766- if (ip6h->nexthdr != IPPROTO_TCP &&
5767- ip6h->nexthdr != IPPROTO_UDP)
5768+ switch (ip6h->nexthdr) {
5769+ case IPPROTO_TCP:
5770+ *hdrsize = sizeof(struct tcphdr);
5771+ break;
5772+ case IPPROTO_UDP:
5773+ *hdrsize = sizeof(struct udphdr);
5774+ break;
5775+ default:
5776 return -1;
5777+ }
5778
5779 if (ip6h->hop_limit <= 1)
5780 return -1;
5781
5782- thoff = sizeof(*ip6h);
5783- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
5784+ if (!pskb_may_pull(skb, thoff + *hdrsize))
5785 return -1;
5786
5787- ip6h = ipv6_hdr(skb);
5788+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
5789 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
5790
5791 tuple->src_v6 = ip6h->saddr;
developer7eb15dc2023-06-14 17:44:03 +08005792@@ -452,6 +551,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08005793 tuple->l3proto = AF_INET6;
5794 tuple->l4proto = ip6h->nexthdr;
5795 tuple->iifidx = dev->ifindex;
5796+ nf_flow_tuple_encap(skb, tuple);
5797
5798 return 0;
5799 }
developer7eb15dc2023-06-14 17:44:03 +08005800@@ -467,13 +567,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08005801 const struct in6_addr *nexthop;
5802 struct flow_offload *flow;
5803 struct net_device *outdev;
5804+ unsigned int thoff, mtu;
5805+ u32 hdrsize, offset = 0;
5806 struct ipv6hdr *ip6h;
5807 struct rt6_info *rt;
5808+ int ret;
5809
5810- if (skb->protocol != htons(ETH_P_IPV6))
5811+ if (skb->protocol != htons(ETH_P_IPV6) &&
5812+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
5813 return NF_ACCEPT;
5814
5815- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
5816+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
5817 return NF_ACCEPT;
5818
5819 tuplehash = flow_offload_lookup(flow_table, &tuple);
developer7eb15dc2023-06-14 17:44:03 +08005820@@ -482,44 +586,62 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08005821
5822 dir = tuplehash->tuple.dir;
5823 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
5824- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
5825- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08005826
developerb7c46752022-07-04 19:51:38 +08005827- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08005828+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
5829+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
5830 return NF_ACCEPT;
5831
developerb7c46752022-07-04 19:51:38 +08005832- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
5833- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08005834+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
5835+ thoff = sizeof(*ip6h) + offset;
5836+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
5837 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08005838
developerb7c46752022-07-04 19:51:38 +08005839- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
developer7eb15dc2023-06-14 17:44:03 +08005840+ if (!nf_flow_dst_check(&tuplehash->tuple)) {
5841 flow_offload_teardown(flow);
5842 return NF_ACCEPT;
5843 }
5844
developer8cb3ac72022-07-04 10:55:14 +08005845- if (skb_try_make_writable(skb, sizeof(*ip6h)))
5846+ if (skb_try_make_writable(skb, thoff + hdrsize))
5847 return NF_DROP;
5848
5849- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
5850- return NF_DROP;
5851+ flow_offload_refresh(flow_table, flow);
5852+
5853+ nf_flow_encap_pop(skb, tuplehash);
5854
5855- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
5856 ip6h = ipv6_hdr(skb);
5857+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
5858+
5859 ip6h->hop_limit--;
5860 skb->tstamp = 0;
5861
5862- if (unlikely(dst_xfrm(&rt->dst))) {
5863+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
5864+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
5865+
5866+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
5867+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
5868 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
5869 IP6CB(skb)->iif = skb->dev->ifindex;
5870 IP6CB(skb)->flags = IP6SKB_FORWARDED;
5871 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
5872 }
5873
5874- skb->dev = outdev;
5875- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
5876- skb_dst_set_noref(skb, &rt->dst);
5877- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
5878+ switch (tuplehash->tuple.xmit_type) {
5879+ case FLOW_OFFLOAD_XMIT_NEIGH:
5880+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
5881+ outdev = rt->dst.dev;
5882+ skb->dev = outdev;
5883+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
5884+ skb_dst_set_noref(skb, &rt->dst);
5885+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
5886+ ret = NF_STOLEN;
5887+ break;
5888+ case FLOW_OFFLOAD_XMIT_DIRECT:
5889+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
5890+ if (ret == NF_DROP)
5891+ flow_offload_teardown(flow);
5892+ break;
5893+ }
5894
5895- return NF_STOLEN;
5896+ return ret;
5897 }
5898 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
5899diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
5900new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +08005901index 0000000..197f48a
developer8cb3ac72022-07-04 10:55:14 +08005902--- /dev/null
5903+++ b/net/netfilter/nf_flow_table_offload.c
developer7eb15dc2023-06-14 17:44:03 +08005904@@ -0,0 +1,1194 @@
developer8cb3ac72022-07-04 10:55:14 +08005905+#include <linux/kernel.h>
5906+#include <linux/init.h>
5907+#include <linux/module.h>
5908+#include <linux/netfilter.h>
5909+#include <linux/rhashtable.h>
5910+#include <linux/netdevice.h>
5911+#include <linux/tc_act/tc_csum.h>
5912+#include <net/flow_offload.h>
5913+#include <net/netfilter/nf_flow_table.h>
5914+#include <net/netfilter/nf_tables.h>
5915+#include <net/netfilter/nf_conntrack.h>
5916+#include <net/netfilter/nf_conntrack_acct.h>
5917+#include <net/netfilter/nf_conntrack_core.h>
5918+#include <net/netfilter/nf_conntrack_tuple.h>
5919+
5920+static struct workqueue_struct *nf_flow_offload_add_wq;
5921+static struct workqueue_struct *nf_flow_offload_del_wq;
5922+static struct workqueue_struct *nf_flow_offload_stats_wq;
5923+
5924+struct flow_offload_work {
5925+ struct list_head list;
5926+ enum flow_cls_command cmd;
5927+ int priority;
5928+ struct nf_flowtable *flowtable;
5929+ struct flow_offload *flow;
5930+ struct work_struct work;
5931+};
5932+
5933+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
5934+ (__match)->dissector.offset[__type] = \
5935+ offsetof(struct nf_flow_key, __field)
5936+
5937+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
5938+ struct ip_tunnel_info *tun_info)
5939+{
5940+ struct nf_flow_key *mask = &match->mask;
5941+ struct nf_flow_key *key = &match->key;
5942+ unsigned int enc_keys;
5943+
5944+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
5945+ return;
5946+
5947+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
5948+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
5949+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
5950+ mask->enc_key_id.keyid = 0xffffffff;
5951+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
5952+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
5953+
5954+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
5955+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
5956+ enc_ipv4);
5957+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
5958+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
5959+ if (key->enc_ipv4.src)
5960+ mask->enc_ipv4.src = 0xffffffff;
5961+ if (key->enc_ipv4.dst)
5962+ mask->enc_ipv4.dst = 0xffffffff;
5963+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
5964+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5965+ } else {
5966+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
5967+ sizeof(struct in6_addr));
5968+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
5969+ sizeof(struct in6_addr));
5970+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
5971+ sizeof(struct in6_addr)))
5972+ memset(&mask->enc_ipv6.src, 0xff,
5973+ sizeof(struct in6_addr));
5974+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
5975+ sizeof(struct in6_addr)))
5976+ memset(&mask->enc_ipv6.dst, 0xff,
5977+ sizeof(struct in6_addr));
5978+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
5979+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5980+ }
5981+
5982+ match->dissector.used_keys |= enc_keys;
5983+}
5984+
5985+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5986+ struct flow_dissector_key_vlan *mask,
5987+ u16 vlan_id, __be16 proto)
5988+{
5989+ key->vlan_id = vlan_id;
5990+ mask->vlan_id = VLAN_VID_MASK;
5991+ key->vlan_tpid = proto;
5992+ mask->vlan_tpid = 0xffff;
5993+}
5994+
5995+static int nf_flow_rule_match(struct nf_flow_match *match,
5996+ const struct flow_offload_tuple *tuple,
5997+ struct dst_entry *other_dst)
5998+{
5999+ struct nf_flow_key *mask = &match->mask;
6000+ struct nf_flow_key *key = &match->key;
6001+ struct ip_tunnel_info *tun_info;
6002+ bool vlan_encap = false;
6003+
6004+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
6005+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
6006+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
6007+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
6008+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
6009+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
6010+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
6011+
6012+ if (other_dst && other_dst->lwtstate) {
6013+ tun_info = lwt_tun_info(other_dst->lwtstate);
6014+ nf_flow_rule_lwt_match(match, tun_info);
6015+ }
6016+
6017+ key->meta.ingress_ifindex = tuple->iifidx;
6018+ mask->meta.ingress_ifindex = 0xffffffff;
6019+
6020+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
6021+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
6022+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
6023+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
6024+ tuple->encap[0].id,
6025+ tuple->encap[0].proto);
6026+ vlan_encap = true;
6027+ }
6028+
6029+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
6030+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
6031+ if (vlan_encap) {
6032+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
6033+ cvlan);
6034+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
6035+ tuple->encap[1].id,
6036+ tuple->encap[1].proto);
6037+ } else {
6038+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
6039+ vlan);
6040+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
6041+ tuple->encap[1].id,
6042+ tuple->encap[1].proto);
6043+ }
6044+ }
6045+
6046+ switch (tuple->l3proto) {
6047+ case AF_INET:
6048+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
6049+ key->basic.n_proto = htons(ETH_P_IP);
6050+ key->ipv4.src = tuple->src_v4.s_addr;
6051+ mask->ipv4.src = 0xffffffff;
6052+ key->ipv4.dst = tuple->dst_v4.s_addr;
6053+ mask->ipv4.dst = 0xffffffff;
6054+ break;
6055+ case AF_INET6:
6056+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
6057+ key->basic.n_proto = htons(ETH_P_IPV6);
6058+ key->ipv6.src = tuple->src_v6;
6059+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
6060+ key->ipv6.dst = tuple->dst_v6;
6061+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
6062+ break;
6063+ default:
6064+ return -EOPNOTSUPP;
6065+ }
6066+ mask->control.addr_type = 0xffff;
6067+ match->dissector.used_keys |= BIT(key->control.addr_type);
6068+ mask->basic.n_proto = 0xffff;
6069+
6070+ switch (tuple->l4proto) {
6071+ case IPPROTO_TCP:
6072+ key->tcp.flags = 0;
6073+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
6074+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
6075+ break;
6076+ case IPPROTO_UDP:
6077+ break;
6078+ default:
6079+ return -EOPNOTSUPP;
6080+ }
6081+
6082+ key->basic.ip_proto = tuple->l4proto;
6083+ mask->basic.ip_proto = 0xff;
6084+
6085+ key->tp.src = tuple->src_port;
6086+ mask->tp.src = 0xffff;
6087+ key->tp.dst = tuple->dst_port;
6088+ mask->tp.dst = 0xffff;
6089+
6090+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
6091+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
6092+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
6093+ BIT(FLOW_DISSECTOR_KEY_PORTS);
6094+ return 0;
6095+}
6096+
6097+static void flow_offload_mangle(struct flow_action_entry *entry,
6098+ enum flow_action_mangle_base htype, u32 offset,
6099+ const __be32 *value, const __be32 *mask)
6100+{
6101+ entry->id = FLOW_ACTION_MANGLE;
6102+ entry->mangle.htype = htype;
6103+ entry->mangle.offset = offset;
6104+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
6105+ memcpy(&entry->mangle.val, value, sizeof(u32));
6106+}
6107+
6108+static inline struct flow_action_entry *
6109+flow_action_entry_next(struct nf_flow_rule *flow_rule)
6110+{
6111+ int i = flow_rule->rule->action.num_entries++;
6112+
6113+ return &flow_rule->rule->action.entries[i];
6114+}
6115+
6116+static int flow_offload_eth_src(struct net *net,
6117+ const struct flow_offload *flow,
6118+ enum flow_offload_tuple_dir dir,
6119+ struct nf_flow_rule *flow_rule)
6120+{
6121+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
6122+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
6123+ const struct flow_offload_tuple *other_tuple, *this_tuple;
6124+ struct net_device *dev = NULL;
6125+ const unsigned char *addr;
6126+ u32 mask, val;
6127+ u16 val16;
6128+
6129+ this_tuple = &flow->tuplehash[dir].tuple;
6130+
6131+ switch (this_tuple->xmit_type) {
6132+ case FLOW_OFFLOAD_XMIT_DIRECT:
6133+ addr = this_tuple->out.h_source;
6134+ break;
6135+ case FLOW_OFFLOAD_XMIT_NEIGH:
6136+ other_tuple = &flow->tuplehash[!dir].tuple;
6137+ dev = dev_get_by_index(net, other_tuple->iifidx);
6138+ if (!dev)
6139+ return -ENOENT;
6140+
6141+ addr = dev->dev_addr;
6142+ break;
6143+ default:
6144+ return -EOPNOTSUPP;
6145+ }
6146+
6147+ mask = ~0xffff0000;
6148+ memcpy(&val16, addr, 2);
6149+ val = val16 << 16;
6150+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
6151+ &val, &mask);
6152+
6153+ mask = ~0xffffffff;
6154+ memcpy(&val, addr + 2, 4);
6155+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
6156+ &val, &mask);
6157+
developer7eb15dc2023-06-14 17:44:03 +08006158+ dev_put(dev);
developer8cb3ac72022-07-04 10:55:14 +08006159+
6160+ return 0;
6161+}
6162+
6163+static int flow_offload_eth_dst(struct net *net,
6164+ const struct flow_offload *flow,
6165+ enum flow_offload_tuple_dir dir,
6166+ struct nf_flow_rule *flow_rule)
6167+{
6168+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
6169+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
6170+ const struct flow_offload_tuple *other_tuple, *this_tuple;
6171+ const struct dst_entry *dst_cache;
6172+ unsigned char ha[ETH_ALEN];
6173+ struct neighbour *n;
6174+ const void *daddr;
6175+ u32 mask, val;
6176+ u8 nud_state;
6177+ u16 val16;
6178+
6179+ this_tuple = &flow->tuplehash[dir].tuple;
6180+
6181+ switch (this_tuple->xmit_type) {
6182+ case FLOW_OFFLOAD_XMIT_DIRECT:
6183+ ether_addr_copy(ha, this_tuple->out.h_dest);
6184+ break;
6185+ case FLOW_OFFLOAD_XMIT_NEIGH:
6186+ other_tuple = &flow->tuplehash[!dir].tuple;
6187+ daddr = &other_tuple->src_v4;
6188+ dst_cache = this_tuple->dst_cache;
6189+ n = dst_neigh_lookup(dst_cache, daddr);
6190+ if (!n)
6191+ return -ENOENT;
6192+
6193+ read_lock_bh(&n->lock);
6194+ nud_state = n->nud_state;
6195+ ether_addr_copy(ha, n->ha);
6196+ read_unlock_bh(&n->lock);
6197+ neigh_release(n);
6198+
6199+ if (!(nud_state & NUD_VALID))
6200+ return -ENOENT;
6201+ break;
6202+ default:
6203+ return -EOPNOTSUPP;
6204+ }
6205+
6206+ mask = ~0xffffffff;
6207+ memcpy(&val, ha, 4);
6208+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
6209+ &val, &mask);
6210+
6211+ mask = ~0x0000ffff;
6212+ memcpy(&val16, ha + 4, 2);
6213+ val = val16;
6214+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
6215+ &val, &mask);
6216+
6217+ return 0;
6218+}
6219+
6220+static void flow_offload_ipv4_snat(struct net *net,
6221+ const struct flow_offload *flow,
6222+ enum flow_offload_tuple_dir dir,
6223+ struct nf_flow_rule *flow_rule)
6224+{
6225+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
6226+ u32 mask = ~htonl(0xffffffff);
6227+ __be32 addr;
6228+ u32 offset;
6229+
6230+ switch (dir) {
6231+ case FLOW_OFFLOAD_DIR_ORIGINAL:
6232+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
6233+ offset = offsetof(struct iphdr, saddr);
6234+ break;
6235+ case FLOW_OFFLOAD_DIR_REPLY:
6236+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
6237+ offset = offsetof(struct iphdr, daddr);
6238+ break;
6239+ default:
6240+ return;
6241+ }
6242+
6243+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
6244+ &addr, &mask);
6245+}
6246+
6247+static void flow_offload_ipv4_dnat(struct net *net,
6248+ const struct flow_offload *flow,
6249+ enum flow_offload_tuple_dir dir,
6250+ struct nf_flow_rule *flow_rule)
6251+{
6252+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
6253+ u32 mask = ~htonl(0xffffffff);
6254+ __be32 addr;
6255+ u32 offset;
6256+
6257+ switch (dir) {
6258+ case FLOW_OFFLOAD_DIR_ORIGINAL:
6259+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
6260+ offset = offsetof(struct iphdr, daddr);
6261+ break;
6262+ case FLOW_OFFLOAD_DIR_REPLY:
6263+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
6264+ offset = offsetof(struct iphdr, saddr);
6265+ break;
6266+ default:
6267+ return;
6268+ }
6269+
6270+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
6271+ &addr, &mask);
6272+}
6273+
6274+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
6275+ unsigned int offset,
6276+ const __be32 *addr, const __be32 *mask)
6277+{
6278+ struct flow_action_entry *entry;
developer7eb15dc2023-06-14 17:44:03 +08006279+ int i;
developer8cb3ac72022-07-04 10:55:14 +08006280+
developer7eb15dc2023-06-14 17:44:03 +08006281+ for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
developer8cb3ac72022-07-04 10:55:14 +08006282+ entry = flow_action_entry_next(flow_rule);
6283+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
developer7eb15dc2023-06-14 17:44:03 +08006284+ offset + i * sizeof(u32), &addr[i], mask);
developer8cb3ac72022-07-04 10:55:14 +08006285+ }
6286+}
6287+
6288+static void flow_offload_ipv6_snat(struct net *net,
6289+ const struct flow_offload *flow,
6290+ enum flow_offload_tuple_dir dir,
6291+ struct nf_flow_rule *flow_rule)
6292+{
6293+ u32 mask = ~htonl(0xffffffff);
6294+ const __be32 *addr;
6295+ u32 offset;
6296+
6297+ switch (dir) {
6298+ case FLOW_OFFLOAD_DIR_ORIGINAL:
6299+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
6300+ offset = offsetof(struct ipv6hdr, saddr);
6301+ break;
6302+ case FLOW_OFFLOAD_DIR_REPLY:
6303+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
6304+ offset = offsetof(struct ipv6hdr, daddr);
6305+ break;
6306+ default:
6307+ return;
6308+ }
6309+
6310+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
6311+}
6312+
6313+static void flow_offload_ipv6_dnat(struct net *net,
6314+ const struct flow_offload *flow,
6315+ enum flow_offload_tuple_dir dir,
6316+ struct nf_flow_rule *flow_rule)
6317+{
6318+ u32 mask = ~htonl(0xffffffff);
6319+ const __be32 *addr;
6320+ u32 offset;
6321+
6322+ switch (dir) {
6323+ case FLOW_OFFLOAD_DIR_ORIGINAL:
6324+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
6325+ offset = offsetof(struct ipv6hdr, daddr);
6326+ break;
6327+ case FLOW_OFFLOAD_DIR_REPLY:
6328+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
6329+ offset = offsetof(struct ipv6hdr, saddr);
6330+ break;
6331+ default:
6332+ return;
6333+ }
6334+
6335+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
6336+}
6337+
6338+static int flow_offload_l4proto(const struct flow_offload *flow)
6339+{
6340+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
6341+ u8 type = 0;
6342+
6343+ switch (protonum) {
6344+ case IPPROTO_TCP:
6345+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
6346+ break;
6347+ case IPPROTO_UDP:
6348+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
6349+ break;
6350+ default:
6351+ break;
6352+ }
6353+
6354+ return type;
6355+}
6356+
6357+static void flow_offload_port_snat(struct net *net,
6358+ const struct flow_offload *flow,
6359+ enum flow_offload_tuple_dir dir,
6360+ struct nf_flow_rule *flow_rule)
6361+{
6362+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
6363+ u32 mask, port;
6364+ u32 offset;
6365+
6366+ switch (dir) {
6367+ case FLOW_OFFLOAD_DIR_ORIGINAL:
6368+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
6369+ offset = 0; /* offsetof(struct tcphdr, source); */
6370+ port = htonl(port << 16);
6371+ mask = ~htonl(0xffff0000);
6372+ break;
6373+ case FLOW_OFFLOAD_DIR_REPLY:
6374+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
6375+ offset = 0; /* offsetof(struct tcphdr, dest); */
6376+ port = htonl(port);
6377+ mask = ~htonl(0xffff);
6378+ break;
6379+ default:
6380+ return;
6381+ }
6382+
6383+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
6384+ &port, &mask);
6385+}
6386+
6387+static void flow_offload_port_dnat(struct net *net,
6388+ const struct flow_offload *flow,
6389+ enum flow_offload_tuple_dir dir,
6390+ struct nf_flow_rule *flow_rule)
6391+{
6392+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
6393+ u32 mask, port;
6394+ u32 offset;
6395+
6396+ switch (dir) {
6397+ case FLOW_OFFLOAD_DIR_ORIGINAL:
6398+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
6399+ offset = 0; /* offsetof(struct tcphdr, dest); */
6400+ port = htonl(port);
6401+ mask = ~htonl(0xffff);
6402+ break;
6403+ case FLOW_OFFLOAD_DIR_REPLY:
6404+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
6405+ offset = 0; /* offsetof(struct tcphdr, source); */
6406+ port = htonl(port << 16);
6407+ mask = ~htonl(0xffff0000);
6408+ break;
6409+ default:
6410+ return;
6411+ }
6412+
6413+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
6414+ &port, &mask);
6415+}
6416+
6417+static void flow_offload_ipv4_checksum(struct net *net,
6418+ const struct flow_offload *flow,
6419+ struct nf_flow_rule *flow_rule)
6420+{
6421+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
6422+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
6423+
6424+ entry->id = FLOW_ACTION_CSUM;
6425+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
6426+
6427+ switch (protonum) {
6428+ case IPPROTO_TCP:
6429+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
6430+ break;
6431+ case IPPROTO_UDP:
6432+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
6433+ break;
6434+ }
6435+}
6436+
6437+static void flow_offload_redirect(struct net *net,
6438+ const struct flow_offload *flow,
6439+ enum flow_offload_tuple_dir dir,
6440+ struct nf_flow_rule *flow_rule)
6441+{
6442+ const struct flow_offload_tuple *this_tuple, *other_tuple;
6443+ struct flow_action_entry *entry;
6444+ struct net_device *dev;
6445+ int ifindex;
6446+
6447+ this_tuple = &flow->tuplehash[dir].tuple;
6448+ switch (this_tuple->xmit_type) {
6449+ case FLOW_OFFLOAD_XMIT_DIRECT:
6450+ this_tuple = &flow->tuplehash[dir].tuple;
6451+ ifindex = this_tuple->out.hw_ifidx;
6452+ break;
6453+ case FLOW_OFFLOAD_XMIT_NEIGH:
6454+ other_tuple = &flow->tuplehash[!dir].tuple;
6455+ ifindex = other_tuple->iifidx;
6456+ break;
6457+ default:
6458+ return;
6459+ }
6460+
6461+ dev = dev_get_by_index(net, ifindex);
6462+ if (!dev)
6463+ return;
6464+
6465+ entry = flow_action_entry_next(flow_rule);
6466+ entry->id = FLOW_ACTION_REDIRECT;
6467+ entry->dev = dev;
6468+}
6469+
6470+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
6471+ enum flow_offload_tuple_dir dir,
6472+ struct nf_flow_rule *flow_rule)
6473+{
6474+ const struct flow_offload_tuple *this_tuple;
6475+ struct flow_action_entry *entry;
6476+ struct dst_entry *dst;
6477+
6478+ this_tuple = &flow->tuplehash[dir].tuple;
6479+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
6480+ return;
6481+
6482+ dst = this_tuple->dst_cache;
6483+ if (dst && dst->lwtstate) {
6484+ struct ip_tunnel_info *tun_info;
6485+
6486+ tun_info = lwt_tun_info(dst->lwtstate);
6487+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
6488+ entry = flow_action_entry_next(flow_rule);
6489+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
6490+ entry->tunnel = tun_info;
6491+ }
6492+ }
6493+}
6494+
6495+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
6496+ enum flow_offload_tuple_dir dir,
6497+ struct nf_flow_rule *flow_rule)
6498+{
6499+ const struct flow_offload_tuple *other_tuple;
6500+ struct flow_action_entry *entry;
6501+ struct dst_entry *dst;
6502+
6503+ other_tuple = &flow->tuplehash[!dir].tuple;
6504+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
6505+ return;
6506+
6507+ dst = other_tuple->dst_cache;
6508+ if (dst && dst->lwtstate) {
6509+ struct ip_tunnel_info *tun_info;
6510+
6511+ tun_info = lwt_tun_info(dst->lwtstate);
6512+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
6513+ entry = flow_action_entry_next(flow_rule);
6514+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
6515+ }
6516+ }
6517+}
6518+
6519+static int
6520+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
6521+ enum flow_offload_tuple_dir dir,
6522+ struct nf_flow_rule *flow_rule)
6523+{
6524+ const struct flow_offload_tuple *other_tuple;
6525+ const struct flow_offload_tuple *tuple;
6526+ int i;
6527+
6528+ flow_offload_decap_tunnel(flow, dir, flow_rule);
6529+ flow_offload_encap_tunnel(flow, dir, flow_rule);
6530+
6531+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
6532+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
6533+ return -1;
6534+
6535+ tuple = &flow->tuplehash[dir].tuple;
6536+
6537+ for (i = 0; i < tuple->encap_num; i++) {
6538+ struct flow_action_entry *entry;
6539+
6540+ if (tuple->in_vlan_ingress & BIT(i))
6541+ continue;
6542+
6543+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
6544+ entry = flow_action_entry_next(flow_rule);
6545+ entry->id = FLOW_ACTION_VLAN_POP;
6546+ }
6547+ }
6548+
6549+ other_tuple = &flow->tuplehash[!dir].tuple;
6550+
6551+ for (i = 0; i < other_tuple->encap_num; i++) {
6552+ struct flow_action_entry *entry;
6553+
6554+ if (other_tuple->in_vlan_ingress & BIT(i))
6555+ continue;
6556+
6557+ entry = flow_action_entry_next(flow_rule);
6558+
6559+ switch (other_tuple->encap[i].proto) {
6560+ case htons(ETH_P_PPP_SES):
6561+ entry->id = FLOW_ACTION_PPPOE_PUSH;
6562+ entry->pppoe.sid = other_tuple->encap[i].id;
6563+ break;
6564+ case htons(ETH_P_8021Q):
6565+ entry->id = FLOW_ACTION_VLAN_PUSH;
6566+ entry->vlan.vid = other_tuple->encap[i].id;
6567+ entry->vlan.proto = other_tuple->encap[i].proto;
6568+ break;
6569+ }
6570+ }
6571+
6572+ return 0;
6573+}
6574+
6575+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
6576+ enum flow_offload_tuple_dir dir,
6577+ struct nf_flow_rule *flow_rule)
6578+{
6579+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
6580+ return -1;
6581+
6582+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
6583+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
6584+ flow_offload_port_snat(net, flow, dir, flow_rule);
6585+ }
6586+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
6587+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
6588+ flow_offload_port_dnat(net, flow, dir, flow_rule);
6589+ }
6590+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
6591+ test_bit(NF_FLOW_DNAT, &flow->flags))
6592+ flow_offload_ipv4_checksum(net, flow, flow_rule);
6593+
6594+ flow_offload_redirect(net, flow, dir, flow_rule);
6595+
6596+ return 0;
6597+}
6598+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
6599+
6600+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
6601+ enum flow_offload_tuple_dir dir,
6602+ struct nf_flow_rule *flow_rule)
6603+{
6604+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
6605+ return -1;
6606+
6607+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
6608+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
6609+ flow_offload_port_snat(net, flow, dir, flow_rule);
6610+ }
6611+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
6612+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
6613+ flow_offload_port_dnat(net, flow, dir, flow_rule);
6614+ }
6615+
6616+ flow_offload_redirect(net, flow, dir, flow_rule);
6617+
6618+ return 0;
6619+}
6620+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
6621+
6622+#define NF_FLOW_RULE_ACTION_MAX 16
6623+
6624+static struct nf_flow_rule *
6625+nf_flow_offload_rule_alloc(struct net *net,
6626+ const struct flow_offload_work *offload,
6627+ enum flow_offload_tuple_dir dir)
6628+{
6629+ const struct nf_flowtable *flowtable = offload->flowtable;
6630+ const struct flow_offload_tuple *tuple, *other_tuple;
6631+ const struct flow_offload *flow = offload->flow;
6632+ struct dst_entry *other_dst = NULL;
6633+ struct nf_flow_rule *flow_rule;
6634+ int err = -ENOMEM;
6635+
6636+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
6637+ if (!flow_rule)
6638+ goto err_flow;
6639+
6640+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
6641+ if (!flow_rule->rule)
6642+ goto err_flow_rule;
6643+
6644+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
6645+ flow_rule->rule->match.mask = &flow_rule->match.mask;
6646+ flow_rule->rule->match.key = &flow_rule->match.key;
6647+
6648+ tuple = &flow->tuplehash[dir].tuple;
6649+ other_tuple = &flow->tuplehash[!dir].tuple;
6650+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
6651+ other_dst = other_tuple->dst_cache;
6652+
6653+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
6654+ if (err < 0)
6655+ goto err_flow_match;
6656+
6657+ flow_rule->rule->action.num_entries = 0;
6658+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
6659+ goto err_flow_match;
6660+
6661+ return flow_rule;
6662+
6663+err_flow_match:
6664+ kfree(flow_rule->rule);
6665+err_flow_rule:
6666+ kfree(flow_rule);
6667+err_flow:
6668+ return NULL;
6669+}
6670+
6671+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
6672+{
6673+ struct flow_action_entry *entry;
6674+ int i;
6675+
6676+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
6677+ entry = &flow_rule->rule->action.entries[i];
6678+ if (entry->id != FLOW_ACTION_REDIRECT)
6679+ continue;
6680+
6681+ dev_put(entry->dev);
6682+ }
6683+ kfree(flow_rule->rule);
6684+ kfree(flow_rule);
6685+}
6686+
6687+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
6688+{
6689+ int i;
6690+
6691+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
6692+ __nf_flow_offload_destroy(flow_rule[i]);
6693+}
6694+
6695+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
6696+ struct nf_flow_rule *flow_rule[])
6697+{
6698+ struct net *net = read_pnet(&offload->flowtable->net);
6699+
6700+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
6701+ FLOW_OFFLOAD_DIR_ORIGINAL);
6702+ if (!flow_rule[0])
6703+ return -ENOMEM;
6704+
6705+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
6706+ FLOW_OFFLOAD_DIR_REPLY);
6707+ if (!flow_rule[1]) {
6708+ __nf_flow_offload_destroy(flow_rule[0]);
6709+ return -ENOMEM;
6710+ }
6711+
6712+ return 0;
6713+}
6714+
6715+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
6716+ __be16 proto, int priority,
6717+ enum flow_cls_command cmd,
6718+ const struct flow_offload_tuple *tuple,
6719+ struct netlink_ext_ack *extack)
6720+{
6721+ cls_flow->common.protocol = proto;
6722+ cls_flow->common.prio = priority;
6723+ cls_flow->common.extack = extack;
6724+ cls_flow->command = cmd;
6725+ cls_flow->cookie = (unsigned long)tuple;
6726+}
6727+
6728+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
6729+ struct flow_offload *flow,
6730+ struct nf_flow_rule *flow_rule,
6731+ enum flow_offload_tuple_dir dir,
6732+ int priority, int cmd,
6733+ struct flow_stats *stats,
6734+ struct list_head *block_cb_list)
6735+{
6736+ struct flow_cls_offload cls_flow = {};
6737+ struct flow_block_cb *block_cb;
6738+ struct netlink_ext_ack extack;
6739+ __be16 proto = ETH_P_ALL;
6740+ int err, i = 0;
6741+
6742+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
6743+ &flow->tuplehash[dir].tuple, &extack);
6744+ if (cmd == FLOW_CLS_REPLACE)
6745+ cls_flow.rule = flow_rule->rule;
6746+
developer0cc0d732023-06-07 13:52:41 +08006747+ down_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006748+ list_for_each_entry(block_cb, block_cb_list, list) {
6749+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
6750+ block_cb->cb_priv);
6751+ if (err < 0)
6752+ continue;
6753+
6754+ i++;
6755+ }
developer0cc0d732023-06-07 13:52:41 +08006756+ up_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006757+
6758+ if (cmd == FLOW_CLS_STATS)
6759+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
6760+
6761+ return i;
6762+}
6763+
6764+static int flow_offload_tuple_add(struct flow_offload_work *offload,
6765+ struct nf_flow_rule *flow_rule,
6766+ enum flow_offload_tuple_dir dir)
6767+{
6768+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
6769+ flow_rule, dir, offload->priority,
6770+ FLOW_CLS_REPLACE, NULL,
6771+ &offload->flowtable->flow_block.cb_list);
6772+}
6773+
6774+static void flow_offload_tuple_del(struct flow_offload_work *offload,
6775+ enum flow_offload_tuple_dir dir)
6776+{
6777+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
6778+ offload->priority, FLOW_CLS_DESTROY, NULL,
6779+ &offload->flowtable->flow_block.cb_list);
6780+}
6781+
6782+static int flow_offload_rule_add(struct flow_offload_work *offload,
6783+ struct nf_flow_rule *flow_rule[])
6784+{
6785+ int ok_count = 0;
6786+
6787+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
6788+ FLOW_OFFLOAD_DIR_ORIGINAL);
6789+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
6790+ FLOW_OFFLOAD_DIR_REPLY);
6791+ if (ok_count == 0)
6792+ return -ENOENT;
6793+
6794+ return 0;
6795+}
6796+
6797+static void flow_offload_work_add(struct flow_offload_work *offload)
6798+{
6799+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
6800+ int err;
6801+
6802+ err = nf_flow_offload_alloc(offload, flow_rule);
6803+ if (err < 0)
6804+ return;
6805+
6806+ err = flow_offload_rule_add(offload, flow_rule);
6807+ if (err < 0)
6808+ goto out;
6809+
6810+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
6811+
6812+out:
6813+ nf_flow_offload_destroy(flow_rule);
6814+}
6815+
6816+static void flow_offload_work_del(struct flow_offload_work *offload)
6817+{
6818+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
6819+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
6820+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
6821+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
6822+}
6823+
6824+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
6825+ enum flow_offload_tuple_dir dir,
6826+ struct flow_stats *stats)
6827+{
6828+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
6829+ offload->priority, FLOW_CLS_STATS, stats,
6830+ &offload->flowtable->flow_block.cb_list);
6831+}
6832+
6833+static void flow_offload_work_stats(struct flow_offload_work *offload)
6834+{
6835+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
6836+ u64 lastused;
6837+
6838+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
6839+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
6840+
6841+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
6842+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
6843+ lastused + flow_offload_get_timeout(offload->flow));
6844+
6845+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
6846+ if (stats[0].pkts)
6847+ nf_ct_acct_add(offload->flow->ct,
6848+ FLOW_OFFLOAD_DIR_ORIGINAL,
6849+ stats[0].pkts, stats[0].bytes);
6850+ if (stats[1].pkts)
6851+ nf_ct_acct_add(offload->flow->ct,
6852+ FLOW_OFFLOAD_DIR_REPLY,
6853+ stats[1].pkts, stats[1].bytes);
6854+ }
6855+}
6856+
6857+static void flow_offload_work_handler(struct work_struct *work)
6858+{
6859+ struct flow_offload_work *offload;
6860+
6861+ offload = container_of(work, struct flow_offload_work, work);
6862+ switch (offload->cmd) {
6863+ case FLOW_CLS_REPLACE:
6864+ flow_offload_work_add(offload);
6865+ break;
6866+ case FLOW_CLS_DESTROY:
6867+ flow_offload_work_del(offload);
6868+ break;
6869+ case FLOW_CLS_STATS:
6870+ flow_offload_work_stats(offload);
6871+ break;
6872+ default:
6873+ WARN_ON_ONCE(1);
6874+ }
6875+
6876+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
6877+ kfree(offload);
6878+}
6879+
6880+static void flow_offload_queue_work(struct flow_offload_work *offload)
6881+{
6882+ if (offload->cmd == FLOW_CLS_REPLACE)
6883+ queue_work(nf_flow_offload_add_wq, &offload->work);
6884+ else if (offload->cmd == FLOW_CLS_DESTROY)
6885+ queue_work(nf_flow_offload_del_wq, &offload->work);
6886+ else
6887+ queue_work(nf_flow_offload_stats_wq, &offload->work);
6888+}
6889+
6890+static struct flow_offload_work *
6891+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
6892+ struct flow_offload *flow, unsigned int cmd)
6893+{
6894+ struct flow_offload_work *offload;
6895+
6896+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
6897+ return NULL;
6898+
6899+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
6900+ if (!offload) {
6901+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
6902+ return NULL;
6903+ }
6904+
6905+ offload->cmd = cmd;
6906+ offload->flow = flow;
6907+ offload->priority = flowtable->priority;
6908+ offload->flowtable = flowtable;
6909+ INIT_WORK(&offload->work, flow_offload_work_handler);
6910+
6911+ return offload;
6912+}
6913+
6914+
6915+void nf_flow_offload_add(struct nf_flowtable *flowtable,
6916+ struct flow_offload *flow)
6917+{
6918+ struct flow_offload_work *offload;
6919+
6920+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
6921+ if (!offload)
6922+ return;
6923+
6924+ flow_offload_queue_work(offload);
6925+}
6926+
6927+void nf_flow_offload_del(struct nf_flowtable *flowtable,
6928+ struct flow_offload *flow)
6929+{
6930+ struct flow_offload_work *offload;
6931+
6932+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
6933+ if (!offload)
6934+ return;
6935+
6936+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
6937+ flow_offload_queue_work(offload);
6938+}
6939+
6940+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developer7eb15dc2023-06-14 17:44:03 +08006941+ struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08006942+{
6943+ struct flow_offload_work *offload;
6944+ __s32 delta;
6945+
developer7eb15dc2023-06-14 17:44:03 +08006946+ delta = nf_flow_timeout_delta(flow->timeout);
6947+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
6948+ return;
developer8cb3ac72022-07-04 10:55:14 +08006949+
6950+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
6951+ if (!offload)
6952+ return;
6953+
6954+ flow_offload_queue_work(offload);
6955+}
6956+
6957+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
6958+{
6959+ if (nf_flowtable_hw_offload(flowtable)) {
6960+ flush_workqueue(nf_flow_offload_add_wq);
6961+ flush_workqueue(nf_flow_offload_del_wq);
6962+ flush_workqueue(nf_flow_offload_stats_wq);
6963+ }
6964+}
6965+
6966+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
6967+ struct flow_block_offload *bo,
6968+ enum flow_block_command cmd)
6969+{
6970+ struct flow_block_cb *block_cb, *next;
6971+ int err = 0;
6972+
developer0cc0d732023-06-07 13:52:41 +08006973+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006974+ switch (cmd) {
6975+ case FLOW_BLOCK_BIND:
6976+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
6977+ break;
6978+ case FLOW_BLOCK_UNBIND:
6979+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
6980+ list_del(&block_cb->list);
6981+ flow_block_cb_free(block_cb);
6982+ }
6983+ break;
6984+ default:
6985+ WARN_ON_ONCE(1);
6986+ err = -EOPNOTSUPP;
6987+ }
developer0cc0d732023-06-07 13:52:41 +08006988+ up_write(&flowtable->flow_block_lock);
developera54478c2022-10-01 16:41:46 +08006989+
developer8cb3ac72022-07-04 10:55:14 +08006990+ return err;
6991+}
6992+
6993+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6994+ struct net *net,
6995+ enum flow_block_command cmd,
6996+ struct nf_flowtable *flowtable,
6997+ struct netlink_ext_ack *extack)
6998+{
6999+ memset(bo, 0, sizeof(*bo));
7000+ bo->net = net;
7001+ bo->block = &flowtable->flow_block;
7002+ bo->command = cmd;
7003+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
7004+ bo->extack = extack;
7005+ INIT_LIST_HEAD(&bo->cb_list);
7006+}
7007+
7008+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
7009+ struct nf_flowtable *flowtable,
7010+ struct net_device *dev,
7011+ enum flow_block_command cmd,
7012+ struct netlink_ext_ack *extack)
7013+{
7014+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
7015+ extack);
7016+ flow_indr_block_call(dev, bo, cmd);
7017+
7018+ if (list_empty(&bo->cb_list))
7019+ return -EOPNOTSUPP;
7020+
7021+ return 0;
7022+}
7023+
7024+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
7025+ struct nf_flowtable *flowtable,
7026+ struct net_device *dev,
7027+ enum flow_block_command cmd,
7028+ struct netlink_ext_ack *extack)
7029+{
7030+ int err;
7031+
7032+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
7033+ extack);
developer0cc0d732023-06-07 13:52:41 +08007034+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08007035+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
developer0cc0d732023-06-07 13:52:41 +08007036+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08007037+ if (err < 0)
7038+ return err;
7039+
7040+ return 0;
7041+}
7042+
7043+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
7044+ struct net_device *dev,
7045+ enum flow_block_command cmd)
7046+{
7047+ struct netlink_ext_ack extack = {};
7048+ struct flow_block_offload bo;
7049+ int err;
7050+
7051+ if (!nf_flowtable_hw_offload(flowtable))
7052+ return 0;
7053+
7054+ if (dev->netdev_ops->ndo_setup_tc)
7055+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
7056+ &extack);
7057+ else
7058+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
7059+ &extack);
7060+ if (err < 0)
7061+ return err;
7062+
7063+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
7064+}
7065+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
7066+
7067+int nf_flow_table_offload_init(void)
7068+{
7069+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
7070+ WQ_UNBOUND | WQ_SYSFS, 0);
7071+ if (!nf_flow_offload_add_wq)
7072+ return -ENOMEM;
7073+
7074+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
7075+ WQ_UNBOUND | WQ_SYSFS, 0);
7076+ if (!nf_flow_offload_del_wq)
7077+ goto err_del_wq;
7078+
7079+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
7080+ WQ_UNBOUND | WQ_SYSFS, 0);
7081+ if (!nf_flow_offload_stats_wq)
7082+ goto err_stats_wq;
7083+
7084+ return 0;
7085+
7086+err_stats_wq:
7087+ destroy_workqueue(nf_flow_offload_del_wq);
7088+err_del_wq:
7089+ destroy_workqueue(nf_flow_offload_add_wq);
7090+ return -ENOMEM;
7091+}
7092+
7093+void nf_flow_table_offload_exit(void)
7094+{
7095+ destroy_workqueue(nf_flow_offload_add_wq);
7096+ destroy_workqueue(nf_flow_offload_del_wq);
7097+ destroy_workqueue(nf_flow_offload_stats_wq);
7098+}
7099diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
7100new file mode 100644
developer7eb15dc2023-06-14 17:44:03 +08007101index 0000000..3437d6a
developer8cb3ac72022-07-04 10:55:14 +08007102--- /dev/null
7103+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developer7eb15dc2023-06-14 17:44:03 +08007104@@ -0,0 +1,795 @@
developer8cb3ac72022-07-04 10:55:14 +08007105+/*
7106+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
7107+ *
7108+ * This program is free software; you can redistribute it and/or modify
7109+ * it under the terms of the GNU General Public License version 2 as
7110+ * published by the Free Software Foundation.
7111+ */
7112+#include <linux/module.h>
7113+#include <linux/init.h>
7114+#include <linux/netfilter.h>
7115+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
7116+#include <linux/if_vlan.h>
7117+#include <net/ip.h>
7118+#include <net/netfilter/nf_conntrack.h>
7119+#include <net/netfilter/nf_conntrack_extend.h>
7120+#include <net/netfilter/nf_conntrack_helper.h>
7121+#include <net/netfilter/nf_flow_table.h>
7122+
7123+struct xt_flowoffload_hook {
7124+ struct hlist_node list;
7125+ struct nf_hook_ops ops;
7126+ struct net *net;
7127+ bool registered;
7128+ bool used;
7129+};
7130+
7131+struct xt_flowoffload_table {
7132+ struct nf_flowtable ft;
7133+ struct hlist_head hooks;
7134+ struct delayed_work work;
7135+};
7136+
7137+struct nf_forward_info {
7138+ const struct net_device *indev;
7139+ const struct net_device *outdev;
7140+ const struct net_device *hw_outdev;
7141+ struct id {
7142+ __u16 id;
7143+ __be16 proto;
7144+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
7145+ u8 num_encaps;
7146+ u8 ingress_vlans;
7147+ u8 h_source[ETH_ALEN];
7148+ u8 h_dest[ETH_ALEN];
7149+ enum flow_offload_xmit_type xmit_type;
7150+};
7151+
7152+static DEFINE_SPINLOCK(hooks_lock);
7153+
7154+struct xt_flowoffload_table flowtable[2];
7155+
7156+static unsigned int
7157+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
7158+ const struct nf_hook_state *state)
7159+{
7160+ struct vlan_ethhdr *veth;
7161+ __be16 proto;
7162+
7163+ switch (skb->protocol) {
7164+ case htons(ETH_P_8021Q):
7165+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
7166+ proto = veth->h_vlan_encapsulated_proto;
7167+ break;
7168+ case htons(ETH_P_PPP_SES):
7169+ proto = nf_flow_pppoe_proto(skb);
7170+ break;
7171+ default:
7172+ proto = skb->protocol;
7173+ break;
7174+ }
7175+
7176+ switch (proto) {
7177+ case htons(ETH_P_IP):
7178+ return nf_flow_offload_ip_hook(priv, skb, state);
7179+ case htons(ETH_P_IPV6):
7180+ return nf_flow_offload_ipv6_hook(priv, skb, state);
7181+ }
7182+
7183+ return NF_ACCEPT;
7184+}
7185+
7186+static int
7187+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
7188+ struct net_device *dev)
7189+{
7190+ struct xt_flowoffload_hook *hook;
7191+ struct nf_hook_ops *ops;
7192+
7193+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
7194+ if (!hook)
7195+ return -ENOMEM;
7196+
7197+ ops = &hook->ops;
7198+ ops->pf = NFPROTO_NETDEV;
7199+ ops->hooknum = NF_NETDEV_INGRESS;
7200+ ops->priority = 10;
7201+ ops->priv = &table->ft;
7202+ ops->hook = xt_flowoffload_net_hook;
7203+ ops->dev = dev;
7204+
7205+ hlist_add_head(&hook->list, &table->hooks);
7206+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
7207+
7208+ return 0;
7209+}
7210+
7211+static struct xt_flowoffload_hook *
7212+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
7213+ struct net_device *dev)
7214+{
7215+ struct xt_flowoffload_hook *hook;
7216+
7217+ hlist_for_each_entry(hook, &table->hooks, list) {
7218+ if (hook->ops.dev == dev)
7219+ return hook;
7220+ }
7221+
7222+ return NULL;
7223+}
7224+
7225+static void
7226+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
7227+ struct net_device *dev)
7228+{
7229+ struct xt_flowoffload_hook *hook;
7230+
7231+ if (!dev)
7232+ return;
7233+
7234+ spin_lock_bh(&hooks_lock);
7235+ hook = flow_offload_lookup_hook(table, dev);
7236+ if (hook)
7237+ hook->used = true;
7238+ else
7239+ xt_flowoffload_create_hook(table, dev);
7240+ spin_unlock_bh(&hooks_lock);
7241+}
7242+
7243+static void
7244+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
7245+{
7246+ struct xt_flowoffload_hook *hook;
7247+
7248+restart:
7249+ hlist_for_each_entry(hook, &table->hooks, list) {
7250+ if (hook->registered)
7251+ continue;
7252+
7253+ hook->registered = true;
7254+ hook->net = dev_net(hook->ops.dev);
7255+ spin_unlock_bh(&hooks_lock);
7256+ nf_register_net_hook(hook->net, &hook->ops);
7257+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
7258+ table->ft.type->setup(&table->ft, hook->ops.dev,
7259+ FLOW_BLOCK_BIND);
7260+ spin_lock_bh(&hooks_lock);
7261+ goto restart;
7262+ }
7263+
7264+}
7265+
7266+static bool
7267+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
7268+{
7269+ struct xt_flowoffload_hook *hook;
7270+ bool active = false;
7271+
7272+restart:
7273+ spin_lock_bh(&hooks_lock);
7274+ hlist_for_each_entry(hook, &table->hooks, list) {
7275+ if (hook->used || !hook->registered) {
7276+ active = true;
7277+ continue;
7278+ }
7279+
7280+ hlist_del(&hook->list);
7281+ spin_unlock_bh(&hooks_lock);
7282+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
7283+ table->ft.type->setup(&table->ft, hook->ops.dev,
7284+ FLOW_BLOCK_UNBIND);
7285+ nf_unregister_net_hook(hook->net, &hook->ops);
7286+ kfree(hook);
7287+ goto restart;
7288+ }
7289+ spin_unlock_bh(&hooks_lock);
7290+
7291+ return active;
7292+}
7293+
7294+static void
7295+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
7296+{
7297+ struct xt_flowoffload_table *table = data;
7298+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
7299+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
7300+ struct xt_flowoffload_hook *hook;
7301+
7302+ spin_lock_bh(&hooks_lock);
7303+ hlist_for_each_entry(hook, &table->hooks, list) {
7304+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
7305+ hook->ops.dev->ifindex != tuple1->iifidx)
7306+ continue;
7307+
7308+ hook->used = true;
7309+ }
7310+ spin_unlock_bh(&hooks_lock);
7311+}
7312+
7313+static void
7314+xt_flowoffload_hook_work(struct work_struct *work)
7315+{
7316+ struct xt_flowoffload_table *table;
7317+ struct xt_flowoffload_hook *hook;
7318+ int err;
7319+
7320+ table = container_of(work, struct xt_flowoffload_table, work.work);
7321+
7322+ spin_lock_bh(&hooks_lock);
7323+ xt_flowoffload_register_hooks(table);
7324+ hlist_for_each_entry(hook, &table->hooks, list)
7325+ hook->used = false;
7326+ spin_unlock_bh(&hooks_lock);
7327+
7328+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
7329+ table);
7330+ if (err && err != -EAGAIN)
7331+ goto out;
7332+
7333+ if (!xt_flowoffload_cleanup_hooks(table))
7334+ return;
7335+
7336+out:
7337+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
7338+}
7339+
7340+static bool
7341+xt_flowoffload_skip(struct sk_buff *skb, int family)
7342+{
7343+ if (skb_sec_path(skb))
7344+ return true;
7345+
7346+ if (family == NFPROTO_IPV4) {
7347+ const struct ip_options *opt = &(IPCB(skb)->opt);
7348+
7349+ if (unlikely(opt->optlen))
7350+ return true;
7351+ }
7352+
7353+ return false;
7354+}
7355+
7356+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
7357+{
7358+ if (dst_xfrm(dst))
7359+ return FLOW_OFFLOAD_XMIT_XFRM;
7360+
7361+ return FLOW_OFFLOAD_XMIT_NEIGH;
7362+}
7363+
7364+static void nf_default_forward_path(struct nf_flow_route *route,
7365+ struct dst_entry *dst_cache,
7366+ enum ip_conntrack_dir dir,
7367+ struct net_device **dev)
7368+{
7369+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
7370+ route->tuple[dir].dst = dst_cache;
7371+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
7372+}
7373+
7374+static bool nf_is_valid_ether_device(const struct net_device *dev)
7375+{
7376+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
7377+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
7378+ return false;
7379+
7380+ return true;
7381+}
7382+
7383+static void nf_dev_path_info(const struct net_device_path_stack *stack,
7384+ struct nf_forward_info *info,
7385+ unsigned char *ha)
7386+{
7387+ const struct net_device_path *path;
7388+ int i;
7389+
7390+ memcpy(info->h_dest, ha, ETH_ALEN);
7391+
7392+ for (i = 0; i < stack->num_paths; i++) {
7393+ path = &stack->path[i];
7394+
7395+ info->indev = path->dev;
7396+
7397+ switch (path->type) {
7398+ case DEV_PATH_ETHERNET:
7399+ case DEV_PATH_DSA:
7400+ case DEV_PATH_VLAN:
7401+ case DEV_PATH_PPPOE:
7402+ if (is_zero_ether_addr(info->h_source))
7403+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
7404+
7405+ if (path->type == DEV_PATH_ETHERNET)
7406+ break;
7407+ if (path->type == DEV_PATH_DSA) {
7408+ i = stack->num_paths;
7409+ break;
7410+ }
7411+
7412+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
7413+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
7414+ info->indev = NULL;
7415+ break;
7416+ }
7417+ if (!info->outdev)
7418+ info->outdev = path->dev;
7419+ info->encap[info->num_encaps].id = path->encap.id;
7420+ info->encap[info->num_encaps].proto = path->encap.proto;
7421+ info->num_encaps++;
7422+ if (path->type == DEV_PATH_PPPOE)
7423+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
7424+ break;
7425+ case DEV_PATH_BRIDGE:
7426+ if (is_zero_ether_addr(info->h_source))
7427+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
7428+
7429+ switch (path->bridge.vlan_mode) {
7430+ case DEV_PATH_BR_VLAN_UNTAG_HW:
7431+ info->ingress_vlans |= BIT(info->num_encaps - 1);
7432+ break;
7433+ case DEV_PATH_BR_VLAN_TAG:
7434+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
7435+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
7436+ info->num_encaps++;
7437+ break;
7438+ case DEV_PATH_BR_VLAN_UNTAG:
7439+ info->num_encaps--;
7440+ break;
7441+ case DEV_PATH_BR_VLAN_KEEP:
7442+ break;
7443+ }
7444+ break;
7445+ default:
7446+ break;
7447+ }
7448+ }
7449+ if (!info->outdev)
7450+ info->outdev = info->indev;
7451+
7452+ info->hw_outdev = info->indev;
7453+
7454+ if (nf_is_valid_ether_device(info->indev))
7455+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
7456+}
7457+
7458+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
7459+ const struct dst_entry *dst_cache,
7460+ const struct nf_conn *ct,
7461+ enum ip_conntrack_dir dir, u8 *ha,
7462+ struct net_device_path_stack *stack)
7463+{
7464+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
7465+ struct net_device *dev = dst_cache->dev;
7466+ struct neighbour *n;
7467+ u8 nud_state;
7468+
7469+ if (!nf_is_valid_ether_device(dev))
7470+ goto out;
7471+
developer9fdc0e82023-05-12 14:21:17 +08007472+ if (ct->status & IPS_NAT_MASK) {
7473+ n = dst_neigh_lookup(dst_cache, daddr);
7474+ if (!n)
7475+ return -1;
developer8cb3ac72022-07-04 10:55:14 +08007476+
developer9fdc0e82023-05-12 14:21:17 +08007477+ read_lock_bh(&n->lock);
7478+ nud_state = n->nud_state;
7479+ ether_addr_copy(ha, n->ha);
7480+ read_unlock_bh(&n->lock);
7481+ neigh_release(n);
developer8cb3ac72022-07-04 10:55:14 +08007482+
developer9fdc0e82023-05-12 14:21:17 +08007483+ if (!(nud_state & NUD_VALID))
7484+ return -1;
7485+ }
developer64db8532023-04-28 13:56:00 +08007486+
developer8cb3ac72022-07-04 10:55:14 +08007487+out:
7488+ return dev_fill_forward_path(dev, ha, stack);
7489+}
7490+
developer9fdc0e82023-05-12 14:21:17 +08007491+static int nf_dev_forward_path(struct sk_buff *skb,
7492+ struct nf_flow_route *route,
developer8cb3ac72022-07-04 10:55:14 +08007493+ const struct nf_conn *ct,
7494+ enum ip_conntrack_dir dir,
7495+ struct net_device **devs)
7496+{
7497+ const struct dst_entry *dst = route->tuple[dir].dst;
developer9fdc0e82023-05-12 14:21:17 +08007498+ struct ethhdr *eth;
7499+ enum ip_conntrack_dir skb_dir;
developer8cb3ac72022-07-04 10:55:14 +08007500+ struct net_device_path_stack stack;
7501+ struct nf_forward_info info = {};
7502+ unsigned char ha[ETH_ALEN];
7503+ int i;
7504+
developer9fdc0e82023-05-12 14:21:17 +08007505+ if (!(ct->status & IPS_NAT_MASK) && skb_mac_header_was_set(skb)) {
7506+ eth = eth_hdr(skb);
7507+ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
7508+
7509+ if (skb_dir != dir) {
7510+ memcpy(ha, eth->h_source, ETH_ALEN);
7511+ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
7512+ } else {
7513+ memcpy(ha, eth->h_dest, ETH_ALEN);
7514+ memcpy(info.h_source, eth->h_source, ETH_ALEN);
7515+ }
7516+ }
7517+
developer7e533772023-04-27 05:59:30 +08007518+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
developer8cb3ac72022-07-04 10:55:14 +08007519+ nf_dev_path_info(&stack, &info, ha);
7520+
7521+ devs[!dir] = (struct net_device *)info.indev;
7522+ if (!info.indev)
7523+ return -1;
7524+
7525+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
7526+ for (i = 0; i < info.num_encaps; i++) {
7527+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
7528+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
7529+ }
7530+ route->tuple[!dir].in.num_encaps = info.num_encaps;
7531+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
7532+
7533+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
7534+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
7535+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
7536+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
7537+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
7538+ route->tuple[dir].xmit_type = info.xmit_type;
7539+ }
7540+
7541+ return 0;
7542+}
7543+
7544+static int
7545+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
7546+ enum ip_conntrack_dir dir,
7547+ const struct xt_action_param *par, int ifindex,
7548+ struct net_device **devs)
7549+{
7550+ struct dst_entry *dst = NULL;
7551+ struct flowi fl;
7552+
7553+ memset(&fl, 0, sizeof(fl));
7554+ switch (xt_family(par)) {
7555+ case NFPROTO_IPV4:
7556+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
7557+ fl.u.ip4.flowi4_oif = ifindex;
7558+ break;
7559+ case NFPROTO_IPV6:
7560+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
7561+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
7562+ fl.u.ip6.flowi6_oif = ifindex;
7563+ break;
7564+ }
7565+
7566+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
7567+ if (!dst)
7568+ return -ENOENT;
7569+
7570+ nf_default_forward_path(route, dst, dir, devs);
7571+
7572+ return 0;
7573+}
7574+
7575+static int
developer480c5d52022-12-28 14:48:14 +08007576+xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
7577+ const struct xt_action_param *par,
7578+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
7579+ struct net_device **devs)
7580+{
7581+ struct dst_entry *this_dst = skb_dst(skb);
7582+ struct dst_entry *other_dst = NULL;
7583+ struct flowi fl;
7584+
7585+ memset(&fl, 0, sizeof(fl));
7586+ switch (xt_family(par)) {
7587+ case NFPROTO_IPV4:
7588+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
7589+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
7590+ break;
7591+ case NFPROTO_IPV6:
7592+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
7593+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
7594+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
7595+ break;
7596+ }
7597+
7598+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
7599+ if (!other_dst)
7600+ return -ENOENT;
7601+
7602+ nf_default_forward_path(route, this_dst, dir, devs);
7603+ nf_default_forward_path(route, other_dst, !dir, devs);
7604+
developer7e533772023-04-27 05:59:30 +08007605+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer480c5d52022-12-28 14:48:14 +08007606+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08007607+ if (nf_dev_forward_path(skb, route, ct, dir, devs))
developer480c5d52022-12-28 14:48:14 +08007608+ return -1;
developer9fdc0e82023-05-12 14:21:17 +08007609+ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
developer480c5d52022-12-28 14:48:14 +08007610+ return -1;
7611+ }
7612+
7613+ return 0;
7614+}
7615+
7616+static int
7617+xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
7618+ const struct xt_action_param *par,
7619+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
7620+ struct net_device **devs)
developer8cb3ac72022-07-04 10:55:14 +08007621+{
7622+ int ret;
7623+
7624+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
7625+ devs[dir]->ifindex,
7626+ devs);
7627+ if (ret)
7628+ return ret;
7629+
7630+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
7631+ devs[!dir]->ifindex,
7632+ devs);
7633+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08007634+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08007635+
developer7e533772023-04-27 05:59:30 +08007636+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer8cb3ac72022-07-04 10:55:14 +08007637+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08007638+ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
7639+ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
developer67bbcc02022-07-08 09:04:01 +08007640+ ret = -1;
7641+ goto err_route_dir2;
7642+ }
developer8cb3ac72022-07-04 10:55:14 +08007643+ }
7644+
7645+ return 0;
developer67bbcc02022-07-08 09:04:01 +08007646+
7647+err_route_dir2:
7648+ dst_release(route->tuple[!dir].dst);
7649+err_route_dir1:
7650+ dst_release(route->tuple[dir].dst);
7651+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08007652+}
7653+
7654+static unsigned int
7655+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
7656+{
7657+ struct xt_flowoffload_table *table;
7658+ const struct xt_flowoffload_target_info *info = par->targinfo;
7659+ struct tcphdr _tcph, *tcph = NULL;
7660+ enum ip_conntrack_info ctinfo;
7661+ enum ip_conntrack_dir dir;
7662+ struct nf_flow_route route = {};
7663+ struct flow_offload *flow = NULL;
7664+ struct net_device *devs[2] = {};
7665+ struct nf_conn *ct;
7666+ struct net *net;
7667+
7668+ if (xt_flowoffload_skip(skb, xt_family(par)))
7669+ return XT_CONTINUE;
7670+
7671+ ct = nf_ct_get(skb, &ctinfo);
7672+ if (ct == NULL)
7673+ return XT_CONTINUE;
7674+
7675+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
7676+ case IPPROTO_TCP:
7677+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
7678+ return XT_CONTINUE;
7679+
7680+ tcph = skb_header_pointer(skb, par->thoff,
7681+ sizeof(_tcph), &_tcph);
7682+ if (unlikely(!tcph || tcph->fin || tcph->rst))
7683+ return XT_CONTINUE;
7684+ break;
7685+ case IPPROTO_UDP:
7686+ break;
7687+ default:
7688+ return XT_CONTINUE;
7689+ }
7690+
7691+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
7692+ ct->status & IPS_SEQ_ADJUST)
7693+ return XT_CONTINUE;
7694+
7695+ if (!nf_ct_is_confirmed(ct))
7696+ return XT_CONTINUE;
7697+
7698+ devs[dir] = xt_out(par);
7699+ devs[!dir] = xt_in(par);
7700+
7701+ if (!devs[dir] || !devs[!dir])
7702+ return XT_CONTINUE;
7703+
7704+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
7705+ return XT_CONTINUE;
7706+
7707+ dir = CTINFO2DIR(ctinfo);
7708+
developer480c5d52022-12-28 14:48:14 +08007709+ if (ct->status & IPS_NAT_MASK) {
7710+ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
7711+ goto err_flow_route;
7712+ } else {
7713+ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
7714+ goto err_flow_route;
7715+ }
developer8cb3ac72022-07-04 10:55:14 +08007716+
7717+ flow = flow_offload_alloc(ct);
7718+ if (!flow)
7719+ goto err_flow_alloc;
7720+
7721+ if (flow_offload_route_init(flow, &route) < 0)
7722+ goto err_flow_add;
7723+
7724+ if (tcph) {
7725+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
7726+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
7727+ }
7728+
7729+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
7730+
7731+ net = read_pnet(&table->ft.net);
7732+ if (!net)
7733+ write_pnet(&table->ft.net, xt_net(par));
7734+
7735+ if (flow_offload_add(&table->ft, flow) < 0)
7736+ goto err_flow_add;
7737+
7738+ xt_flowoffload_check_device(table, devs[0]);
7739+ xt_flowoffload_check_device(table, devs[1]);
7740+
developer480c5d52022-12-28 14:48:14 +08007741+ if (!(ct->status & IPS_NAT_MASK))
7742+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08007743+ dst_release(route.tuple[!dir].dst);
7744+
7745+ return XT_CONTINUE;
7746+
7747+err_flow_add:
7748+ flow_offload_free(flow);
7749+err_flow_alloc:
developer480c5d52022-12-28 14:48:14 +08007750+ if (!(ct->status & IPS_NAT_MASK))
7751+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08007752+ dst_release(route.tuple[!dir].dst);
7753+err_flow_route:
7754+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
7755+
7756+ return XT_CONTINUE;
7757+}
7758+
7759+static int flowoffload_chk(const struct xt_tgchk_param *par)
7760+{
7761+ struct xt_flowoffload_target_info *info = par->targinfo;
7762+
7763+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
7764+ return -EINVAL;
7765+
7766+ return 0;
7767+}
7768+
7769+static struct xt_target offload_tg_reg __read_mostly = {
7770+ .family = NFPROTO_UNSPEC,
7771+ .name = "FLOWOFFLOAD",
7772+ .revision = 0,
7773+ .targetsize = sizeof(struct xt_flowoffload_target_info),
7774+ .usersize = sizeof(struct xt_flowoffload_target_info),
7775+ .checkentry = flowoffload_chk,
7776+ .target = flowoffload_tg,
7777+ .me = THIS_MODULE,
7778+};
7779+
7780+static int flow_offload_netdev_event(struct notifier_block *this,
7781+ unsigned long event, void *ptr)
7782+{
7783+ struct xt_flowoffload_hook *hook0, *hook1;
7784+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
7785+
7786+ if (event != NETDEV_UNREGISTER)
7787+ return NOTIFY_DONE;
7788+
7789+ spin_lock_bh(&hooks_lock);
7790+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
7791+ if (hook0)
7792+ hlist_del(&hook0->list);
7793+
7794+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
7795+ if (hook1)
7796+ hlist_del(&hook1->list);
7797+ spin_unlock_bh(&hooks_lock);
7798+
7799+ if (hook0) {
7800+ nf_unregister_net_hook(hook0->net, &hook0->ops);
7801+ kfree(hook0);
7802+ }
7803+
7804+ if (hook1) {
7805+ nf_unregister_net_hook(hook1->net, &hook1->ops);
7806+ kfree(hook1);
7807+ }
7808+
7809+ nf_flow_table_cleanup(dev);
7810+
7811+ return NOTIFY_DONE;
7812+}
7813+
7814+static struct notifier_block flow_offload_netdev_notifier = {
7815+ .notifier_call = flow_offload_netdev_event,
7816+};
7817+
7818+static int nf_flow_rule_route_inet(struct net *net,
7819+ const struct flow_offload *flow,
7820+ enum flow_offload_tuple_dir dir,
7821+ struct nf_flow_rule *flow_rule)
7822+{
7823+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
7824+ int err;
7825+
7826+ switch (flow_tuple->l3proto) {
7827+ case NFPROTO_IPV4:
7828+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
7829+ break;
7830+ case NFPROTO_IPV6:
7831+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
7832+ break;
7833+ default:
7834+ err = -1;
7835+ break;
7836+ }
7837+
7838+ return err;
7839+}
7840+
7841+static struct nf_flowtable_type flowtable_inet = {
7842+ .family = NFPROTO_INET,
7843+ .init = nf_flow_table_init,
7844+ .setup = nf_flow_table_offload_setup,
7845+ .action = nf_flow_rule_route_inet,
7846+ .free = nf_flow_table_free,
7847+ .hook = xt_flowoffload_net_hook,
7848+ .owner = THIS_MODULE,
7849+};
7850+
7851+static int init_flowtable(struct xt_flowoffload_table *tbl)
7852+{
7853+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
7854+ tbl->ft.type = &flowtable_inet;
developer7eb15dc2023-06-14 17:44:03 +08007855+ tbl->ft.flags = NF_FLOWTABLE_COUNTER;
developer8cb3ac72022-07-04 10:55:14 +08007856+
7857+ return nf_flow_table_init(&tbl->ft);
7858+}
7859+
7860+static int __init xt_flowoffload_tg_init(void)
7861+{
7862+ int ret;
7863+
7864+ register_netdevice_notifier(&flow_offload_netdev_notifier);
7865+
7866+ ret = init_flowtable(&flowtable[0]);
7867+ if (ret)
7868+ return ret;
7869+
7870+ ret = init_flowtable(&flowtable[1]);
7871+ if (ret)
7872+ goto cleanup;
7873+
developer7eb15dc2023-06-14 17:44:03 +08007874+ flowtable[1].ft.flags |= NF_FLOWTABLE_HW_OFFLOAD;
developer8cb3ac72022-07-04 10:55:14 +08007875+
7876+ ret = xt_register_target(&offload_tg_reg);
7877+ if (ret)
7878+ goto cleanup2;
7879+
7880+ return 0;
7881+
7882+cleanup2:
7883+ nf_flow_table_free(&flowtable[1].ft);
7884+cleanup:
7885+ nf_flow_table_free(&flowtable[0].ft);
7886+ return ret;
7887+}
7888+
7889+static void __exit xt_flowoffload_tg_exit(void)
7890+{
7891+ xt_unregister_target(&offload_tg_reg);
7892+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
7893+ nf_flow_table_free(&flowtable[0].ft);
7894+ nf_flow_table_free(&flowtable[1].ft);
7895+}
7896+
7897+MODULE_LICENSE("GPL");
7898+module_init(xt_flowoffload_tg_init);
7899+module_exit(xt_flowoffload_tg_exit);
7900--
79012.18.0
7902