[][MAC80211][WED][Add wed driver to support tx/rx offload]

[Description]
Add wed driver to support tx/rx offload

[Release-log]
N/A

Change-Id: Ic77fdde01ce06ef5638d077d880864dffa8ba821
Reviewed-on: https://gerrit.mediatek.inc/c/openwrt/feeds/mtk_openwrt_feeds/+/6197267
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9990-mt7622-backport-nf-hw-offload-framework-and-ups.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9990-mt7622-backport-nf-hw-offload-framework-and-ups.patch
new file mode 100755
index 0000000..fee4d12
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9990-mt7622-backport-nf-hw-offload-framework-and-ups.patch
@@ -0,0 +1,6840 @@
+From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
+From: Bo Jiao <Bo.Jiao@mediatek.com>
+Date: Wed, 22 Jun 2022 09:42:19 +0800
+Subject: [PATCH 1/8] 
+ 9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2
+
+---
+ drivers/net/ethernet/mediatek/Makefile        |    3 +-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c   |   28 +-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h   |   20 +-
+ drivers/net/ethernet/mediatek/mtk_ppe.c       |  509 +++++++
+ drivers/net/ethernet/mediatek/mtk_ppe.h       |  288 ++++
+ .../net/ethernet/mediatek/mtk_ppe_debugfs.c   |  214 +++
+ .../net/ethernet/mediatek/mtk_ppe_offload.c   |  526 ++++++++
+ drivers/net/ethernet/mediatek/mtk_ppe_regs.h  |  144 ++
+ drivers/net/ppp/ppp_generic.c                 |   22 +
+ drivers/net/ppp/pppoe.c                       |   24 +
+ include/linux/netdevice.h                     |   60 +
+ include/linux/ppp_channel.h                   |    3 +
+ include/net/dsa.h                             |   10 +
+ include/net/flow_offload.h                    |    4 +
+ include/net/ip6_route.h                       |    5 +-
+ .../net/netfilter/ipv6/nf_conntrack_ipv6.h    |    3 -
+ include/net/netfilter/nf_conntrack.h          |   12 +
+ include/net/netfilter/nf_conntrack_acct.h     |   11 +
+ include/net/netfilter/nf_flow_table.h         |  264 +++-
+ include/net/netns/conntrack.h                 |    6 +
+ .../linux/netfilter/nf_conntrack_common.h     |    9 +-
+ include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h |   17 +
+ net/8021q/vlan_dev.c                          |   21 +
+ net/bridge/br_device.c                        |   49 +
+ net/bridge/br_private.h                       |   20 +
+ net/bridge/br_vlan.c                          |   55 +
+ net/core/dev.c                                |   46 +
+ net/dsa/dsa.c                                 |    9 +
+ net/dsa/slave.c                               |   41 +-
+ net/ipv4/netfilter/Kconfig                    |    4 +-
+ net/ipv6/ip6_output.c                         |    2 +-
+ net/ipv6/netfilter/Kconfig                    |    3 +-
+ net/ipv6/route.c                              |   22 +-
+ net/netfilter/Kconfig                         |   14 +-
+ net/netfilter/Makefile                        |    4 +-
+ net/netfilter/nf_conntrack_core.c             |   20 +-
+ net/netfilter/nf_conntrack_proto_tcp.c        |    4 +
+ net/netfilter/nf_conntrack_proto_udp.c        |    4 +
+ net/netfilter/nf_conntrack_standalone.c       |   34 +-
+ net/netfilter/nf_flow_table_core.c            |  446 +++---
+ net/netfilter/nf_flow_table_ip.c              |  455 ++++---
+ net/netfilter/nf_flow_table_offload.c         | 1191 +++++++++++++++++
+ net/netfilter/xt_FLOWOFFLOAD.c                |  719 ++++++++++
+ 43 files changed, 4913 insertions(+), 432 deletions(-)
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+ create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
+ create mode 100644 net/netfilter/nf_flow_table_offload.c
+ create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
+
+diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
+index 13c5b4e8f..0a6af99f1 100755
+--- a/drivers/net/ethernet/mediatek/Makefile
++++ b/drivers/net/ethernet/mediatek/Makefile
+@@ -4,5 +4,6 @@
+ #
+ 
+ obj-$(CONFIG_NET_MEDIATEK_SOC)			+= mtk_eth.o
+-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
++mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o	\
++	     mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
+ obj-$(CONFIG_NET_MEDIATEK_HNAT)			+= mtk_hnat/
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 2b21f7ed0..819d8a0be 100755
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -2654,12 +2654,17 @@ static int mtk_open(struct net_device *dev)
+ 
+ 	/* we run 2 netdevs on the same dma ring so we only bring it up once */
+ 	if (!refcount_read(&eth->dma_refcnt)) {
+-		int err = mtk_start_dma(eth);
++		u32 gdm_config = MTK_GDMA_TO_PDMA;
++		int err;
+ 
++		err = mtk_start_dma(eth);
+ 		if (err)
+ 			return err;
+ 
+-		mtk_gdm_config(eth, MTK_GDMA_TO_PDMA);
++		if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
++			gdm_config = MTK_GDMA_TO_PPE;
++
++		mtk_gdm_config(eth, gdm_config);
+ 
+ 		/* Indicates CDM to parse the MTK special tag from CPU */
+ 		if (netdev_uses_dsa(dev)) {
+@@ -2772,6 +2777,9 @@ static int mtk_stop(struct net_device *dev)
+ 
+ 	mtk_dma_free(eth);
+ 
++	if (eth->soc->offload_version)
++		mtk_ppe_stop(&eth->ppe);
++
+ 	return 0;
+ }
+ 
+@@ -3391,6 +3399,7 @@ static const struct net_device_ops mtk_netdev_ops = {
+ #ifdef CONFIG_NET_POLL_CONTROLLER
+ 	.ndo_poll_controller	= mtk_poll_controller,
+ #endif
++	.ndo_setup_tc		= mtk_eth_setup_tc,
+ };
+ 
+ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
+@@ -3682,6 +3691,17 @@ static int mtk_probe(struct platform_device *pdev)
+ 			goto err_free_dev;
+ 	}
+ 
++	if (eth->soc->offload_version) {
++		err = mtk_ppe_init(&eth->ppe, eth->dev,
++				   eth->base + MTK_ETH_PPE_BASE, 2);
++		if (err)
++			goto err_free_dev;
++
++		err = mtk_eth_offload_init(eth);
++		if (err)
++			goto err_free_dev;
++	}
++
+ 	for (i = 0; i < MTK_MAX_DEVS; i++) {
+ 		if (!eth->netdev[i])
+ 			continue;
+@@ -3781,6 +3801,7 @@ static const struct mtk_soc_data mt2701_data = {
+ 	.required_clks = MT7623_CLKS_BITMAP,
+ 	.required_pctl = true,
+ 	.has_sram = false,
++	.offload_version = 2,
+ };
+ 
+ static const struct mtk_soc_data mt7621_data = {
+@@ -3789,6 +3810,7 @@ static const struct mtk_soc_data mt7621_data = {
+ 	.required_clks = MT7621_CLKS_BITMAP,
+ 	.required_pctl = false,
+ 	.has_sram = false,
++	.offload_version = 2,
+ };
+ 
+ static const struct mtk_soc_data mt7622_data = {
+@@ -3798,6 +3820,7 @@ static const struct mtk_soc_data mt7622_data = {
+ 	.required_clks = MT7622_CLKS_BITMAP,
+ 	.required_pctl = false,
+ 	.has_sram = false,
++	.offload_version = 2,
+ };
+ 
+ static const struct mtk_soc_data mt7623_data = {
+@@ -3806,6 +3829,7 @@ static const struct mtk_soc_data mt7623_data = {
+ 	.required_clks = MT7623_CLKS_BITMAP,
+ 	.required_pctl = true,
+ 	.has_sram = false,
++	.offload_version = 2,
+ };
+ 
+ static const struct mtk_soc_data mt7629_data = {
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index b6380ffeb..349f98503 100755
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -15,6 +15,8 @@
+ #include <linux/u64_stats_sync.h>
+ #include <linux/refcount.h>
+ #include <linux/phylink.h>
++#include <linux/rhashtable.h>
++#include "mtk_ppe.h"
+ 
+ #define MTK_QDMA_PAGE_SIZE	2048
+ #define	MTK_MAX_RX_LENGTH	1536
+@@ -37,7 +39,8 @@
+ 				 NETIF_F_HW_VLAN_CTAG_TX | \
+ 				 NETIF_F_SG | NETIF_F_TSO | \
+ 				 NETIF_F_TSO6 | \
+-				 NETIF_F_IPV6_CSUM)
++				 NETIF_F_IPV6_CSUM |\
++				 NETIF_F_HW_TC)
+ #define MTK_SET_FEATURES	(NETIF_F_LRO | \
+ 				 NETIF_F_HW_VLAN_CTAG_RX)
+ #define MTK_HW_FEATURES_MT7628	(NETIF_F_SG | NETIF_F_RXCSUM)
+@@ -107,6 +110,7 @@
+ #define MTK_GDMA_TCS_EN		BIT(21)
+ #define MTK_GDMA_UCS_EN		BIT(20)
+ #define MTK_GDMA_TO_PDMA	0x0
++#define MTK_GDMA_TO_PPE		0x4444
+ #define MTK_GDMA_DROP_ALL	0x7777
+ 
+ /* Unicast Filter MAC Address Register - Low */
+@@ -547,6 +551,12 @@
+ #define RX_DMA_TCI(_x)		((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
+ #define RX_DMA_VPID(_x)		(((_x) >> 16) & 0xffff)
+ 
++/* QDMA descriptor rxd4 */
++#define MTK_RXD4_FOE_ENTRY	GENMASK(13, 0)
++#define MTK_RXD4_PPE_CPU_REASON	GENMASK(18, 14)
++#define MTK_RXD4_SRC_PORT	GENMASK(21, 19)
++#define MTK_RXD4_ALG		GENMASK(31, 22)
++
+ /* QDMA descriptor rxd4 */
+ #define RX_DMA_L4_VALID		BIT(24)
+ #define RX_DMA_L4_VALID_PDMA	BIT(30)		/* when PDMA is used */
+@@ -1158,6 +1168,7 @@ struct mtk_soc_data {
+ 	u32		caps;
+ 	u32		required_clks;
+ 	bool		required_pctl;
++	u8		offload_version;
+ 	netdev_features_t hw_features;
+ 	bool		has_sram;
+ };
+@@ -1271,6 +1282,9 @@ struct mtk_eth {
+ 	int				ip_align;
+ 	spinlock_t			syscfg0_lock;
+ 	struct timer_list		mtk_dma_monitor_timer;
++
++	struct mtk_ppe			ppe;
++	struct rhashtable		flow_table;
+ };
+ 
+ /* struct mtk_mac -	the structure that holds the info about the MACs of the
+@@ -1319,4 +1333,8 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
+ void mtk_gdm_config(struct mtk_eth *eth, u32 config);
+ void ethsys_reset(struct mtk_eth *eth, u32 reset_bits);
+ 
++int mtk_eth_offload_init(struct mtk_eth *eth);
++int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
++		     void *type_data);
++
+ #endif /* MTK_ETH_H */
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+new file mode 100644
+index 000000000..66298e223
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -0,0 +1,509 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#include <linux/kernel.h>
++#include <linux/io.h>
++#include <linux/iopoll.h>
++#include <linux/etherdevice.h>
++#include <linux/platform_device.h>
++#include "mtk_ppe.h"
++#include "mtk_ppe_regs.h"
++
++static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
++{
++	writel(val, ppe->base + reg);
++}
++
++static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
++{
++	return readl(ppe->base + reg);
++}
++
++static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
++{
++	u32 val;
++
++	val = ppe_r32(ppe, reg);
++	val &= ~mask;
++	val |= set;
++	ppe_w32(ppe, reg, val);
++
++	return val;
++}
++
++static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
++{
++	return ppe_m32(ppe, reg, 0, val);
++}
++
++static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
++{
++	return ppe_m32(ppe, reg, val, 0);
++}
++
++static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
++{
++	int ret;
++	u32 val;
++
++	ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
++				 !(val & MTK_PPE_GLO_CFG_BUSY),
++				 20, MTK_PPE_WAIT_TIMEOUT_US);
++
++	if (ret)
++		dev_err(ppe->dev, "PPE table busy");
++
++	return ret;
++}
++
++static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
++{
++	ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
++	ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
++}
++
++static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
++{
++	mtk_ppe_cache_clear(ppe);
++
++	ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
++		enable * MTK_PPE_CACHE_CTL_EN);
++}
++
++static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
++{
++	u32 hv1, hv2, hv3;
++	u32 hash;
++
++	switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
++		case MTK_PPE_PKT_TYPE_BRIDGE:
++			hv1 = e->bridge.src_mac_lo;
++			hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
++			hv2 = e->bridge.src_mac_hi >> 16;
++			hv2 ^= e->bridge.dest_mac_lo;
++			hv3 = e->bridge.dest_mac_hi;
++			break;
++		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++			hv1 = e->ipv4.orig.ports;
++			hv2 = e->ipv4.orig.dest_ip;
++			hv3 = e->ipv4.orig.src_ip;
++			break;
++		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
++		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
++			hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
++			hv1 ^= e->ipv6.ports;
++
++			hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
++			hv2 ^= e->ipv6.dest_ip[0];
++
++			hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
++			hv3 ^= e->ipv6.src_ip[0];
++			break;
++		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++		case MTK_PPE_PKT_TYPE_IPV6_6RD:
++		default:
++			WARN_ON_ONCE(1);
++			return MTK_PPE_HASH_MASK;
++	}
++
++	hash = (hv1 & hv2) | ((~hv1) & hv3);
++	hash = (hash >> 24) | ((hash & 0xffffff) << 8);
++	hash ^= hv1 ^ hv2 ^ hv3;
++	hash ^= hash >> 16;
++	hash <<= 1;
++	hash &= MTK_PPE_ENTRIES - 1;
++
++	return hash;
++}
++
++static inline struct mtk_foe_mac_info *
++mtk_foe_entry_l2(struct mtk_foe_entry *entry)
++{
++	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++
++	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
++		return &entry->ipv6.l2;
++
++	return &entry->ipv4.l2;
++}
++
++static inline u32 *
++mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
++{
++	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++
++	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
++		return &entry->ipv6.ib2;
++
++	return &entry->ipv4.ib2;
++}
++
++int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
++			  u8 pse_port, u8 *src_mac, u8 *dest_mac)
++{
++	struct mtk_foe_mac_info *l2;
++	u32 ports_pad, val;
++
++	memset(entry, 0, sizeof(*entry));
++
++	val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
++	      FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
++	      FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
++	      MTK_FOE_IB1_BIND_TTL |
++	      MTK_FOE_IB1_BIND_CACHE;
++	entry->ib1 = val;
++
++	val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
++	      FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
++	      FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
++
++	if (is_multicast_ether_addr(dest_mac))
++		val |= MTK_FOE_IB2_MULTICAST;
++
++	ports_pad = 0xa5a5a500 | (l4proto & 0xff);
++	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
++		entry->ipv4.orig.ports = ports_pad;
++	if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
++		entry->ipv6.ports = ports_pad;
++
++	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
++		entry->ipv6.ib2 = val;
++		l2 = &entry->ipv6.l2;
++	} else {
++		entry->ipv4.ib2 = val;
++		l2 = &entry->ipv4.l2;
++	}
++
++	l2->dest_mac_hi = get_unaligned_be32(dest_mac);
++	l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
++	l2->src_mac_hi = get_unaligned_be32(src_mac);
++	l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
++
++	if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
++		l2->etype = ETH_P_IPV6;
++	else
++		l2->etype = ETH_P_IP;
++
++	return 0;
++}
++
++int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
++{
++	u32 *ib2 = mtk_foe_entry_ib2(entry);
++	u32 val;
++
++	val = *ib2;
++	val &= ~MTK_FOE_IB2_DEST_PORT;
++	val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
++	*ib2 = val;
++
++	return 0;
++}
++
++int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
++				 __be32 src_addr, __be16 src_port,
++				 __be32 dest_addr, __be16 dest_port)
++{
++	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++	struct mtk_ipv4_tuple *t;
++
++	switch (type) {
++	case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++		if (egress) {
++			t = &entry->ipv4.new;
++			break;
++		}
++		fallthrough;
++	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++	case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++		t = &entry->ipv4.orig;
++		break;
++	case MTK_PPE_PKT_TYPE_IPV6_6RD:
++		entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
++		entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
++		return 0;
++	default:
++		WARN_ON_ONCE(1);
++		return -EINVAL;
++	}
++
++	t->src_ip = be32_to_cpu(src_addr);
++	t->dest_ip = be32_to_cpu(dest_addr);
++
++	if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
++		return 0;
++
++	t->src_port = be16_to_cpu(src_port);
++	t->dest_port = be16_to_cpu(dest_port);
++
++	return 0;
++}
++
++int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
++				 __be32 *src_addr, __be16 src_port,
++				 __be32 *dest_addr, __be16 dest_port)
++{
++	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++	u32 *src, *dest;
++	int i;
++
++	switch (type) {
++	case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++		src = entry->dslite.tunnel_src_ip;
++		dest = entry->dslite.tunnel_dest_ip;
++		break;
++	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
++	case MTK_PPE_PKT_TYPE_IPV6_6RD:
++		entry->ipv6.src_port = be16_to_cpu(src_port);
++		entry->ipv6.dest_port = be16_to_cpu(dest_port);
++		fallthrough;
++	case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
++		src = entry->ipv6.src_ip;
++		dest = entry->ipv6.dest_ip;
++		break;
++	default:
++		WARN_ON_ONCE(1);
++		return -EINVAL;
++	}
++
++	for (i = 0; i < 4; i++)
++		src[i] = be32_to_cpu(src_addr[i]);
++	for (i = 0; i < 4; i++)
++		dest[i] = be32_to_cpu(dest_addr[i]);
++
++	return 0;
++}
++
++int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
++{
++	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
++
++	l2->etype = BIT(port);
++
++	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
++		entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
++	else
++		l2->etype |= BIT(8);
++
++	entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
++
++	return 0;
++}
++
++int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
++{
++	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
++
++	switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
++	case 0:
++		entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
++			      FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
++		l2->vlan1 = vid;
++		return 0;
++	case 1:
++		if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
++			l2->vlan1 = vid;
++			l2->etype |= BIT(8);
++		} else {
++			l2->vlan2 = vid;
++			entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
++		}
++		return 0;
++	default:
++		return -ENOSPC;
++	}
++}
++
++int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
++{
++	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
++
++	if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
++	    (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
++		l2->etype = ETH_P_PPP_SES;
++
++	entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
++	l2->pppoe_id = sid;
++
++	return 0;
++}
++
++static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
++{
++	return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
++	       FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
++}
++
++int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
++			 u16 timestamp)
++{
++	struct mtk_foe_entry *hwe;
++	u32 hash;
++
++	timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
++	entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
++	entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
++
++	hash = mtk_ppe_hash_entry(entry);
++	hwe = &ppe->foe_table[hash];
++	if (!mtk_foe_entry_usable(hwe)) {
++		hwe++;
++		hash++;
++
++		if (!mtk_foe_entry_usable(hwe))
++			return -ENOSPC;
++	}
++
++	memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
++	wmb();
++	hwe->ib1 = entry->ib1;
++
++	dma_wmb();
++
++	mtk_ppe_cache_clear(ppe);
++
++	return hash;
++}
++
++int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
++		 int version)
++{
++	struct mtk_foe_entry *foe;
++
++	/* need to allocate a separate device, since it PPE DMA access is
++	 * not coherent.
++	 */
++	ppe->base = base;
++	ppe->dev = dev;
++	ppe->version = version;
++
++	foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
++				  &ppe->foe_phys, GFP_KERNEL);
++	if (!foe)
++		return -ENOMEM;
++
++	ppe->foe_table = foe;
++
++	mtk_ppe_debugfs_init(ppe);
++
++	return 0;
++}
++
++static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
++{
++	static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
++	int i, k;
++
++	memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
++
++	if (!IS_ENABLED(CONFIG_SOC_MT7621))
++		return;
++
++	/* skip all entries that cross the 1024 byte boundary */
++	for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
++		for (k = 0; k < ARRAY_SIZE(skip); k++)
++			ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
++}
++
++int mtk_ppe_start(struct mtk_ppe *ppe)
++{
++	u32 val;
++
++	mtk_ppe_init_foe_table(ppe);
++	ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
++
++	val = MTK_PPE_TB_CFG_ENTRY_80B |
++	      MTK_PPE_TB_CFG_AGE_NON_L4 |
++	      MTK_PPE_TB_CFG_AGE_UNBIND |
++	      MTK_PPE_TB_CFG_AGE_TCP |
++	      MTK_PPE_TB_CFG_AGE_UDP |
++	      MTK_PPE_TB_CFG_AGE_TCP_FIN |
++	      FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
++			 MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
++	      FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
++			 MTK_PPE_KEEPALIVE_DISABLE) |
++	      FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
++	      FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
++			 MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
++	      FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
++			 MTK_PPE_ENTRIES_SHIFT);
++	ppe_w32(ppe, MTK_PPE_TB_CFG, val);
++
++	ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
++		MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
++
++	mtk_ppe_cache_enable(ppe, true);
++
++	val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
++	      MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
++	      MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
++	      MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
++	      MTK_PPE_FLOW_CFG_IP6_6RD |
++	      MTK_PPE_FLOW_CFG_IP4_NAT |
++	      MTK_PPE_FLOW_CFG_IP4_NAPT |
++	      MTK_PPE_FLOW_CFG_IP4_DSLITE |
++	      MTK_PPE_FLOW_CFG_L2_BRIDGE |
++	      MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
++	ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
++
++	val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
++	      FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
++	ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
++
++	val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
++	      FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
++	ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
++
++	val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
++	      FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
++	ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
++
++	val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
++	ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
++
++	val = MTK_PPE_BIND_LIMIT1_FULL |
++	      FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
++	ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
++
++	val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
++	      FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
++	ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
++
++	/* enable PPE */
++	val = MTK_PPE_GLO_CFG_EN |
++	      MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
++	      MTK_PPE_GLO_CFG_IP4_CS_DROP |
++	      MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
++	ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
++
++	ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
++
++	return 0;
++}
++
++int mtk_ppe_stop(struct mtk_ppe *ppe)
++{
++	u32 val;
++	int i;
++
++	for (i = 0; i < MTK_PPE_ENTRIES; i++)
++		ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
++						   MTK_FOE_STATE_INVALID);
++
++	mtk_ppe_cache_enable(ppe, false);
++
++	/* disable offload engine */
++	ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
++	ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
++
++	/* disable aging */
++	val = MTK_PPE_TB_CFG_AGE_NON_L4 |
++	      MTK_PPE_TB_CFG_AGE_UNBIND |
++	      MTK_PPE_TB_CFG_AGE_TCP |
++	      MTK_PPE_TB_CFG_AGE_UDP |
++	      MTK_PPE_TB_CFG_AGE_TCP_FIN;
++	ppe_clear(ppe, MTK_PPE_TB_CFG, val);
++
++	return mtk_ppe_wait_busy(ppe);
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
+new file mode 100644
+index 000000000..242fb8f2a
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
+@@ -0,0 +1,288 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#ifndef __MTK_PPE_H
++#define __MTK_PPE_H
++
++#include <linux/kernel.h>
++#include <linux/bitfield.h>
++
++#define MTK_ETH_PPE_BASE		0xc00
++
++#define MTK_PPE_ENTRIES_SHIFT		3
++#define MTK_PPE_ENTRIES			(1024 << MTK_PPE_ENTRIES_SHIFT)
++#define MTK_PPE_HASH_MASK		(MTK_PPE_ENTRIES - 1)
++#define MTK_PPE_WAIT_TIMEOUT_US		1000000
++
++#define MTK_FOE_IB1_UNBIND_TIMESTAMP	GENMASK(7, 0)
++#define MTK_FOE_IB1_UNBIND_PACKETS	GENMASK(23, 8)
++#define MTK_FOE_IB1_UNBIND_PREBIND	BIT(24)
++
++#define MTK_FOE_IB1_BIND_TIMESTAMP	GENMASK(14, 0)
++#define MTK_FOE_IB1_BIND_KEEPALIVE	BIT(15)
++#define MTK_FOE_IB1_BIND_VLAN_LAYER	GENMASK(18, 16)
++#define MTK_FOE_IB1_BIND_PPPOE		BIT(19)
++#define MTK_FOE_IB1_BIND_VLAN_TAG	BIT(20)
++#define MTK_FOE_IB1_BIND_PKT_SAMPLE	BIT(21)
++#define MTK_FOE_IB1_BIND_CACHE		BIT(22)
++#define MTK_FOE_IB1_BIND_TUNNEL_DECAP	BIT(23)
++#define MTK_FOE_IB1_BIND_TTL		BIT(24)
++
++#define MTK_FOE_IB1_PACKET_TYPE		GENMASK(27, 25)
++#define MTK_FOE_IB1_STATE		GENMASK(29, 28)
++#define MTK_FOE_IB1_UDP			BIT(30)
++#define MTK_FOE_IB1_STATIC		BIT(31)
++
++enum {
++	MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
++	MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
++	MTK_PPE_PKT_TYPE_BRIDGE = 2,
++	MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
++	MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
++	MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
++	MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
++};
++
++#define MTK_FOE_IB2_QID			GENMASK(3, 0)
++#define MTK_FOE_IB2_PSE_QOS		BIT(4)
++#define MTK_FOE_IB2_DEST_PORT		GENMASK(7, 5)
++#define MTK_FOE_IB2_MULTICAST		BIT(8)
++
++#define MTK_FOE_IB2_WHNAT_QID2		GENMASK(13, 12)
++#define MTK_FOE_IB2_WHNAT_DEVIDX	BIT(16)
++#define MTK_FOE_IB2_WHNAT_NAT		BIT(17)
++
++#define MTK_FOE_IB2_PORT_MG		GENMASK(17, 12)
++
++#define MTK_FOE_IB2_PORT_AG		GENMASK(23, 18)
++
++#define MTK_FOE_IB2_DSCP		GENMASK(31, 24)
++
++#define MTK_FOE_VLAN2_WHNAT_BSS		GEMMASK(5, 0)
++#define MTK_FOE_VLAN2_WHNAT_WCID	GENMASK(13, 6)
++#define MTK_FOE_VLAN2_WHNAT_RING	GENMASK(15, 14)
++
++enum {
++	MTK_FOE_STATE_INVALID,
++	MTK_FOE_STATE_UNBIND,
++	MTK_FOE_STATE_BIND,
++	MTK_FOE_STATE_FIN
++};
++
++struct mtk_foe_mac_info {
++	u16 vlan1;
++	u16 etype;
++
++	u32 dest_mac_hi;
++
++	u16 vlan2;
++	u16 dest_mac_lo;
++
++	u32 src_mac_hi;
++
++	u16 pppoe_id;
++	u16 src_mac_lo;
++};
++
++struct mtk_foe_bridge {
++	u32 dest_mac_hi;
++
++	u16 src_mac_lo;
++	u16 dest_mac_lo;
++
++	u32 src_mac_hi;
++
++	u32 ib2;
++
++	u32 _rsv[5];
++
++	u32 udf_tsid;
++	struct mtk_foe_mac_info l2;
++};
++
++struct mtk_ipv4_tuple {
++	u32 src_ip;
++	u32 dest_ip;
++	union {
++		struct {
++			u16 dest_port;
++			u16 src_port;
++		};
++		struct {
++			u8 protocol;
++			u8 _pad[3]; /* fill with 0xa5a5a5 */
++		};
++		u32 ports;
++	};
++};
++
++struct mtk_foe_ipv4 {
++	struct mtk_ipv4_tuple orig;
++
++	u32 ib2;
++
++	struct mtk_ipv4_tuple new;
++
++	u16 timestamp;
++	u16 _rsv0[3];
++
++	u32 udf_tsid;
++
++	struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_ipv4_dslite {
++	struct mtk_ipv4_tuple ip4;
++
++	u32 tunnel_src_ip[4];
++	u32 tunnel_dest_ip[4];
++
++	u8 flow_label[3];
++	u8 priority;
++
++	u32 udf_tsid;
++
++	u32 ib2;
++
++	struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_ipv6 {
++	u32 src_ip[4];
++	u32 dest_ip[4];
++
++	union {
++		struct {
++			u8 protocol;
++			u8 _pad[3]; /* fill with 0xa5a5a5 */
++		}; /* 3-tuple */
++		struct {
++			u16 dest_port;
++			u16 src_port;
++		}; /* 5-tuple */
++		u32 ports;
++	};
++
++	u32 _rsv[3];
++
++	u32 udf;
++
++	u32 ib2;
++	struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_ipv6_6rd {
++	u32 src_ip[4];
++	u32 dest_ip[4];
++	u16 dest_port;
++	u16 src_port;
++
++	u32 tunnel_src_ip;
++	u32 tunnel_dest_ip;
++
++	u16 hdr_csum;
++	u8 dscp;
++	u8 ttl;
++
++	u8 flag;
++	u8 pad;
++	u8 per_flow_6rd_id;
++	u8 pad2;
++
++	u32 ib2;
++	struct mtk_foe_mac_info l2;
++};
++
++struct mtk_foe_entry {
++	u32 ib1;
++
++	union {
++		struct mtk_foe_bridge bridge;
++		struct mtk_foe_ipv4 ipv4;
++		struct mtk_foe_ipv4_dslite dslite;
++		struct mtk_foe_ipv6 ipv6;
++		struct mtk_foe_ipv6_6rd ipv6_6rd;
++		u32 data[19];
++	};
++};
++
++enum {
++	MTK_PPE_CPU_REASON_TTL_EXCEEDED			= 0x02,
++	MTK_PPE_CPU_REASON_OPTION_HEADER		= 0x03,
++	MTK_PPE_CPU_REASON_NO_FLOW			= 0x07,
++	MTK_PPE_CPU_REASON_IPV4_FRAG			= 0x08,
++	MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG		= 0x09,
++	MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP	= 0x0a,
++	MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP		= 0x0b,
++	MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST		= 0x0c,
++	MTK_PPE_CPU_REASON_UN_HIT			= 0x0d,
++	MTK_PPE_CPU_REASON_HIT_UNBIND			= 0x0e,
++	MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED	= 0x0f,
++	MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN		= 0x10,
++	MTK_PPE_CPU_REASON_HIT_TTL_1			= 0x11,
++	MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION	= 0x12,
++	MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR		= 0x13,
++	MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR		= 0x14,
++	MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR	= 0x15,
++	MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU		= 0x16,
++	MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER		= 0x17,
++	MTK_PPE_CPU_REASON_MULTICAST_TO_CPU		= 0x18,
++	MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU	= 0x19,
++	MTK_PPE_CPU_REASON_HIT_PRE_BIND			= 0x1a,
++	MTK_PPE_CPU_REASON_PACKET_SAMPLING		= 0x1b,
++	MTK_PPE_CPU_REASON_EXCEED_MTU			= 0x1c,
++	MTK_PPE_CPU_REASON_PPE_BYPASS			= 0x1e,
++	MTK_PPE_CPU_REASON_INVALID			= 0x1f,
++};
++
++struct mtk_ppe {
++	struct device *dev;
++	void __iomem *base;
++	int version;
++
++	struct mtk_foe_entry *foe_table;
++	dma_addr_t foe_phys;
++
++	void *acct_table;
++};
++
++int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
++		 int version);
++int mtk_ppe_start(struct mtk_ppe *ppe);
++int mtk_ppe_stop(struct mtk_ppe *ppe);
++
++static inline void
++mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
++{
++	ppe->foe_table[hash].ib1 = 0;
++	dma_wmb();
++}
++
++static inline int
++mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
++{
++	u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
++
++	if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
++		return -1;
++
++	return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
++}
++
++int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
++			  u8 pse_port, u8 *src_mac, u8 *dest_mac);
++int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
++int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
++				 __be32 src_addr, __be16 src_port,
++				 __be32 dest_addr, __be16 dest_port);
++int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
++				 __be32 *src_addr, __be16 src_port,
++				 __be32 *dest_addr, __be16 dest_port);
++int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
++int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
++int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
++int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
++			 u16 timestamp);
++int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
++
++#endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+new file mode 100644
+index 000000000..d4b482340
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+@@ -0,0 +1,214 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#include <linux/kernel.h>
++#include <linux/debugfs.h>
++#include "mtk_eth_soc.h"
++
++struct mtk_flow_addr_info
++{
++	void *src, *dest;
++	u16 *src_port, *dest_port;
++	bool ipv6;
++};
++
++static const char *mtk_foe_entry_state_str(int state)
++{
++	static const char * const state_str[] = {
++		[MTK_FOE_STATE_INVALID] = "INV",
++		[MTK_FOE_STATE_UNBIND] = "UNB",
++		[MTK_FOE_STATE_BIND] = "BND",
++		[MTK_FOE_STATE_FIN] = "FIN",
++	};
++
++	if (state >= ARRAY_SIZE(state_str) || !state_str[state])
++		return "UNK";
++
++	return state_str[state];
++}
++
++static const char *mtk_foe_pkt_type_str(int type)
++{
++	static const char * const type_str[] = {
++		[MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
++		[MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
++		[MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
++		[MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
++		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
++		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
++		[MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
++	};
++
++	if (type >= ARRAY_SIZE(type_str) || !type_str[type])
++		return "UNKNOWN";
++
++	return type_str[type];
++}
++
++static void
++mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
++{
++	u32 n_addr[4];
++	int i;
++
++	if (!ipv6) {
++		seq_printf(m, "%pI4h", addr);
++		return;
++	}
++
++	for (i = 0; i < ARRAY_SIZE(n_addr); i++)
++		n_addr[i] = htonl(addr[i]);
++	seq_printf(m, "%pI6", n_addr);
++}
++
++static void
++mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
++{
++	mtk_print_addr(m, ai->src, ai->ipv6);
++	if (ai->src_port)
++		seq_printf(m, ":%d", *ai->src_port);
++	seq_printf(m, "->");
++	mtk_print_addr(m, ai->dest, ai->ipv6);
++	if (ai->dest_port)
++		seq_printf(m, ":%d", *ai->dest_port);
++}
++
++static int
++mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
++{
++	struct mtk_ppe *ppe = m->private;
++	int i;
++
++	for (i = 0; i < MTK_PPE_ENTRIES; i++) {
++		struct mtk_foe_entry *entry = &ppe->foe_table[i];
++		struct mtk_foe_mac_info *l2;
++		struct mtk_flow_addr_info ai = {};
++		unsigned char h_source[ETH_ALEN];
++		unsigned char h_dest[ETH_ALEN];
++		int type, state;
++		u32 ib2;
++
++
++		state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
++		if (!state)
++			continue;
++
++		if (bind && state != MTK_FOE_STATE_BIND)
++			continue;
++
++		type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
++		seq_printf(m, "%05x %s %7s", i,
++			   mtk_foe_entry_state_str(state),
++			   mtk_foe_pkt_type_str(type));
++
++		switch (type) {
++		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++			ai.src_port = &entry->ipv4.orig.src_port;
++			ai.dest_port = &entry->ipv4.orig.dest_port;
++			fallthrough;
++		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++			ai.src = &entry->ipv4.orig.src_ip;
++			ai.dest = &entry->ipv4.orig.dest_ip;
++			break;
++		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
++			ai.src_port = &entry->ipv6.src_port;
++			ai.dest_port = &entry->ipv6.dest_port;
++			fallthrough;
++		case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
++		case MTK_PPE_PKT_TYPE_IPV6_6RD:
++			ai.src = &entry->ipv6.src_ip;
++			ai.dest = &entry->ipv6.dest_ip;
++			ai.ipv6 = true;
++			break;
++		}
++
++		seq_printf(m, " orig=");
++		mtk_print_addr_info(m, &ai);
++
++		switch (type) {
++		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
++		case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
++			ai.src_port = &entry->ipv4.new.src_port;
++			ai.dest_port = &entry->ipv4.new.dest_port;
++			fallthrough;
++		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
++			ai.src = &entry->ipv4.new.src_ip;
++			ai.dest = &entry->ipv4.new.dest_ip;
++			seq_printf(m, " new=");
++			mtk_print_addr_info(m, &ai);
++			break;
++		}
++
++		if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
++			l2 = &entry->ipv6.l2;
++			ib2 = entry->ipv6.ib2;
++		} else {
++			l2 = &entry->ipv4.l2;
++			ib2 = entry->ipv4.ib2;
++		}
++
++		*((__be32 *)h_source) = htonl(l2->src_mac_hi);
++		*((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
++		*((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
++		*((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
++
++		seq_printf(m, " eth=%pM->%pM etype=%04x"
++			      " vlan=%d,%d ib1=%08x ib2=%08x\n",
++			   h_source, h_dest, ntohs(l2->etype),
++			   l2->vlan1, l2->vlan2, entry->ib1, ib2);
++	}
++
++	return 0;
++}
++
++static int
++mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
++{
++	return mtk_ppe_debugfs_foe_show(m, private, false);
++}
++
++static int
++mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
++{
++	return mtk_ppe_debugfs_foe_show(m, private, true);
++}
++
++static int
++mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
++{
++	return single_open(file, mtk_ppe_debugfs_foe_show_all,
++			   inode->i_private);
++}
++
++static int
++mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
++{
++	return single_open(file, mtk_ppe_debugfs_foe_show_bind,
++			   inode->i_private);
++}
++
++int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
++{
++	static const struct file_operations fops_all = {
++		.open = mtk_ppe_debugfs_foe_open_all,
++		.read = seq_read,
++		.llseek = seq_lseek,
++		.release = single_release,
++	};
++
++	static const struct file_operations fops_bind = {
++		.open = mtk_ppe_debugfs_foe_open_bind,
++		.read = seq_read,
++		.llseek = seq_lseek,
++		.release = single_release,
++	};
++
++	struct dentry *root;
++
++	root = debugfs_create_dir("mtk_ppe", NULL);
++	debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
++	debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
++
++	return 0;
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+new file mode 100644
+index 000000000..4294f0c74
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -0,0 +1,526 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ *  Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
++ */
++
++#include <linux/if_ether.h>
++#include <linux/rhashtable.h>
++#include <linux/ip.h>
++#include <linux/ipv6.h>
++#include <net/flow_offload.h>
++#include <net/pkt_cls.h>
++#include <net/dsa.h>
++#include "mtk_eth_soc.h"
++
++struct mtk_flow_data {
++	struct ethhdr eth;
++
++	union {
++		struct {
++			__be32 src_addr;
++			__be32 dst_addr;
++		} v4;
++
++		struct {
++			struct in6_addr src_addr;
++			struct in6_addr dst_addr;
++		} v6;
++	};
++
++	__be16 src_port;
++	__be16 dst_port;
++
++	struct {
++		u16 id;
++		__be16 proto;
++		u8 num;
++	} vlan;
++	struct {
++		u16 sid;
++		u8 num;
++	} pppoe;
++};
++
++struct mtk_flow_entry {
++	struct rhash_head node;
++	unsigned long cookie;
++	u16 hash;
++};
++
++static const struct rhashtable_params mtk_flow_ht_params = {
++	.head_offset = offsetof(struct mtk_flow_entry, node),
++	.key_offset = offsetof(struct mtk_flow_entry, cookie),
++	.key_len = sizeof(unsigned long),
++	.automatic_shrinking = true,
++};
++
++static u32
++mtk_eth_timestamp(struct mtk_eth *eth)
++{
++	return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
++}
++
++static int
++mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
++		       bool egress)
++{
++	return mtk_foe_entry_set_ipv4_tuple(foe, egress,
++					    data->v4.src_addr, data->src_port,
++					    data->v4.dst_addr, data->dst_port);
++}
++
++static int
++mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
++{
++	return mtk_foe_entry_set_ipv6_tuple(foe,
++					    data->v6.src_addr.s6_addr32, data->src_port,
++					    data->v6.dst_addr.s6_addr32, data->dst_port);
++}
++
++static void
++mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
++{
++	void *dest = eth + act->mangle.offset;
++	const void *src = &act->mangle.val;
++
++	if (act->mangle.offset > 8)
++		return;
++
++	if (act->mangle.mask == 0xffff) {
++		src += 2;
++		dest += 2;
++	}
++
++	memcpy(dest, src, act->mangle.mask ? 2 : 4);
++}
++
++
++static int
++mtk_flow_mangle_ports(const struct flow_action_entry *act,
++		      struct mtk_flow_data *data)
++{
++	u32 val = ntohl(act->mangle.val);
++
++	switch (act->mangle.offset) {
++	case 0:
++		if (act->mangle.mask == ~htonl(0xffff))
++			data->dst_port = cpu_to_be16(val);
++		else
++			data->src_port = cpu_to_be16(val >> 16);
++		break;
++	case 2:
++		data->dst_port = cpu_to_be16(val);
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
++static int
++mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
++		     struct mtk_flow_data *data)
++{
++	__be32 *dest;
++
++	switch (act->mangle.offset) {
++	case offsetof(struct iphdr, saddr):
++		dest = &data->v4.src_addr;
++		break;
++	case offsetof(struct iphdr, daddr):
++		dest = &data->v4.dst_addr;
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	memcpy(dest, &act->mangle.val, sizeof(u32));
++
++	return 0;
++}
++
++static int
++mtk_flow_get_dsa_port(struct net_device **dev)
++{
++#if IS_ENABLED(CONFIG_NET_DSA)
++	struct dsa_port *dp;
++
++	dp = dsa_port_from_netdev(*dev);
++	if (IS_ERR(dp))
++		return -ENODEV;
++
++	if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
++		return -ENODEV;
++
++	*dev = dp->cpu_dp->master;
++
++	return dp->index;
++#else
++	return -ENODEV;
++#endif
++}
++
++static int
++mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
++			   struct net_device *dev)
++{
++	int pse_port, dsa_port;
++
++	dsa_port = mtk_flow_get_dsa_port(&dev);
++	if (dsa_port >= 0)
++		mtk_foe_entry_set_dsa(foe, dsa_port);
++
++	if (dev == eth->netdev[0])
++		pse_port = 1;
++	else if (dev == eth->netdev[1])
++		pse_port = 2;
++	else
++		return -EOPNOTSUPP;
++
++	mtk_foe_entry_set_pse_port(foe, pse_port);
++
++	return 0;
++}
++
++static int
++mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
++{
++	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
++	struct flow_action_entry *act;
++	struct mtk_flow_data data = {};
++	struct mtk_foe_entry foe;
++	struct net_device *odev = NULL;
++	struct mtk_flow_entry *entry;
++	int offload_type = 0;
++	u16 addr_type = 0;
++	u32 timestamp;
++	u8 l4proto = 0;
++	int err = 0;
++	int hash;
++	int i;
++
++	if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
++		return -EEXIST;
++
++	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
++		struct flow_match_meta match;
++
++		flow_rule_match_meta(rule, &match);
++	} else {
++		return -EOPNOTSUPP;
++	}
++
++	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
++		struct flow_match_control match;
++
++		flow_rule_match_control(rule, &match);
++		addr_type = match.key->addr_type;
++	} else {
++		return -EOPNOTSUPP;
++	}
++
++	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
++		struct flow_match_basic match;
++
++		flow_rule_match_basic(rule, &match);
++		l4proto = match.key->ip_proto;
++	} else {
++		return -EOPNOTSUPP;
++	}
++
++	flow_action_for_each(i, act, &rule->action) {
++		switch (act->id) {
++		case FLOW_ACTION_MANGLE:
++			if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
++				mtk_flow_offload_mangle_eth(act, &data.eth);
++			break;
++		case FLOW_ACTION_REDIRECT:
++			odev = act->dev;
++			break;
++		case FLOW_ACTION_CSUM:
++			break;
++		case FLOW_ACTION_VLAN_PUSH:
++			if (data.vlan.num == 1 ||
++			    act->vlan.proto != htons(ETH_P_8021Q))
++				return -EOPNOTSUPP;
++
++			data.vlan.id = act->vlan.vid;
++			data.vlan.proto = act->vlan.proto;
++			data.vlan.num++;
++			break;
++		case FLOW_ACTION_VLAN_POP:
++			break;
++		case FLOW_ACTION_PPPOE_PUSH:
++			if (data.pppoe.num == 1)
++				return -EOPNOTSUPP;
++
++			data.pppoe.sid = act->pppoe.sid;
++			data.pppoe.num++;
++			break;
++		default:
++			return -EOPNOTSUPP;
++		}
++	}
++
++	switch (addr_type) {
++	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
++		offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
++		break;
++	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
++		offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
++		break;
++	default:
++		return -EOPNOTSUPP;
++	}
++
++	if (!is_valid_ether_addr(data.eth.h_source) ||
++	    !is_valid_ether_addr(data.eth.h_dest))
++		return -EINVAL;
++
++	err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
++				    data.eth.h_source,
++				    data.eth.h_dest);
++	if (err)
++		return err;
++
++	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
++		struct flow_match_ports ports;
++
++		flow_rule_match_ports(rule, &ports);
++		data.src_port = ports.key->src;
++		data.dst_port = ports.key->dst;
++	} else {
++		return -EOPNOTSUPP;
++	}
++
++	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
++		struct flow_match_ipv4_addrs addrs;
++
++		flow_rule_match_ipv4_addrs(rule, &addrs);
++
++		data.v4.src_addr = addrs.key->src;
++		data.v4.dst_addr = addrs.key->dst;
++
++		mtk_flow_set_ipv4_addr(&foe, &data, false);
++	}
++
++	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
++		struct flow_match_ipv6_addrs addrs;
++
++		flow_rule_match_ipv6_addrs(rule, &addrs);
++
++		data.v6.src_addr = addrs.key->src;
++		data.v6.dst_addr = addrs.key->dst;
++
++		mtk_flow_set_ipv6_addr(&foe, &data);
++	}
++
++	flow_action_for_each(i, act, &rule->action) {
++		if (act->id != FLOW_ACTION_MANGLE)
++			continue;
++
++		switch (act->mangle.htype) {
++		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
++		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
++			err = mtk_flow_mangle_ports(act, &data);
++			break;
++		case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
++			err = mtk_flow_mangle_ipv4(act, &data);
++			break;
++		case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
++			/* handled earlier */
++			break;
++		default:
++			return -EOPNOTSUPP;
++		}
++
++		if (err)
++			return err;
++	}
++
++	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
++		err = mtk_flow_set_ipv4_addr(&foe, &data, true);
++		if (err)
++			return err;
++	}
++
++	if (data.vlan.num == 1) {
++		if (data.vlan.proto != htons(ETH_P_8021Q))
++			return -EOPNOTSUPP;
++
++		mtk_foe_entry_set_vlan(&foe, data.vlan.id);
++	}
++	if (data.pppoe.num == 1)
++		mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
++
++	err = mtk_flow_set_output_device(eth, &foe, odev);
++	if (err)
++		return err;
++
++	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
++	if (!entry)
++		return -ENOMEM;
++
++	entry->cookie = f->cookie;
++	timestamp = mtk_eth_timestamp(eth);
++	hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
++	if (hash < 0) {
++		err = hash;
++		goto free;
++	}
++
++	entry->hash = hash;
++	err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
++				     mtk_flow_ht_params);
++	if (err < 0)
++		goto clear_flow;
++
++	return 0;
++clear_flow:
++	mtk_foe_entry_clear(&eth->ppe, hash);
++free:
++	kfree(entry);
++	return err;
++}
++
++static int
++mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
++{
++	struct mtk_flow_entry *entry;
++
++	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
++				  mtk_flow_ht_params);
++	if (!entry)
++		return -ENOENT;
++
++	mtk_foe_entry_clear(&eth->ppe, entry->hash);
++	rhashtable_remove_fast(&eth->flow_table, &entry->node,
++			       mtk_flow_ht_params);
++	kfree(entry);
++
++	return 0;
++}
++
++static int
++mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
++{
++	struct mtk_flow_entry *entry;
++	int timestamp;
++	u32 idle;
++
++	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
++				  mtk_flow_ht_params);
++	if (!entry)
++		return -ENOENT;
++
++	timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
++	if (timestamp < 0)
++		return -ETIMEDOUT;
++
++	idle = mtk_eth_timestamp(eth) - timestamp;
++	f->stats.lastused = jiffies - idle * HZ;
++
++	return 0;
++}
++
++static DEFINE_MUTEX(mtk_flow_offload_mutex);
++
++static int
++mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
++{
++	struct flow_cls_offload *cls = type_data;
++	struct net_device *dev = cb_priv;
++	struct mtk_mac *mac = netdev_priv(dev);
++	struct mtk_eth *eth = mac->hw;
++	int err;
++
++	if (!tc_can_offload(dev))
++		return -EOPNOTSUPP;
++
++	if (type != TC_SETUP_CLSFLOWER)
++		return -EOPNOTSUPP;
++
++	mutex_lock(&mtk_flow_offload_mutex);
++	switch (cls->command) {
++	case FLOW_CLS_REPLACE:
++		err = mtk_flow_offload_replace(eth, cls);
++		break;
++	case FLOW_CLS_DESTROY:
++		err = mtk_flow_offload_destroy(eth, cls);
++		break;
++	case FLOW_CLS_STATS:
++		err = mtk_flow_offload_stats(eth, cls);
++		break;
++	default:
++		err = -EOPNOTSUPP;
++		break;
++	}
++	mutex_unlock(&mtk_flow_offload_mutex);
++
++	return err;
++}
++
++static int
++mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
++{
++	struct mtk_mac *mac = netdev_priv(dev);
++	struct mtk_eth *eth = mac->hw;
++	static LIST_HEAD(block_cb_list);
++	struct flow_block_cb *block_cb;
++	flow_setup_cb_t *cb;
++
++	if (!eth->ppe.foe_table)
++		return -EOPNOTSUPP;
++
++	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
++		return -EOPNOTSUPP;
++
++	cb = mtk_eth_setup_tc_block_cb;
++	f->driver_block_list = &block_cb_list;
++
++	switch (f->command) {
++	case FLOW_BLOCK_BIND:
++		block_cb = flow_block_cb_lookup(f->block, cb, dev);
++		if (block_cb) {
++			flow_block_cb_incref(block_cb);
++			return 0;
++		}
++		block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
++		if (IS_ERR(block_cb))
++			return PTR_ERR(block_cb);
++
++		flow_block_cb_add(block_cb, f);
++		list_add_tail(&block_cb->driver_list, &block_cb_list);
++		return 0;
++	case FLOW_BLOCK_UNBIND:
++		block_cb = flow_block_cb_lookup(f->block, cb, dev);
++		if (!block_cb)
++			return -ENOENT;
++
++		if (flow_block_cb_decref(block_cb)) {
++			flow_block_cb_remove(block_cb, f);
++			list_del(&block_cb->driver_list);
++		}
++		return 0;
++	default:
++		return -EOPNOTSUPP;
++	}
++}
++
++int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
++		     void *type_data)
++{
++	if (type == TC_SETUP_FT)
++		return mtk_eth_setup_tc_block(dev, type_data);
++
++	return -EOPNOTSUPP;
++}
++
++int mtk_eth_offload_init(struct mtk_eth *eth)
++{
++	if (!eth->ppe.foe_table)
++		return 0;
++
++	return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+new file mode 100644
+index 000000000..0c45ea090
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+@@ -0,0 +1,144 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#ifndef __MTK_PPE_REGS_H
++#define __MTK_PPE_REGS_H
++
++#define MTK_PPE_GLO_CFG				0x200
++#define MTK_PPE_GLO_CFG_EN			BIT(0)
++#define MTK_PPE_GLO_CFG_TSID_EN			BIT(1)
++#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP		BIT(2)
++#define MTK_PPE_GLO_CFG_IP4_CS_DROP		BIT(3)
++#define MTK_PPE_GLO_CFG_TTL0_DROP		BIT(4)
++#define MTK_PPE_GLO_CFG_PPE_BSWAP		BIT(5)
++#define MTK_PPE_GLO_CFG_PSE_HASH_OFS		BIT(6)
++#define MTK_PPE_GLO_CFG_MCAST_TB_EN		BIT(7)
++#define MTK_PPE_GLO_CFG_FLOW_DROP_KA		BIT(8)
++#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE	BIT(9)
++#define MTK_PPE_GLO_CFG_UDP_LITE_EN		BIT(10)
++#define MTK_PPE_GLO_CFG_UDP_LEN_DROP		BIT(11)
++#define MTK_PPE_GLO_CFG_MCAST_ENTRIES		GNEMASK(13, 12)
++#define MTK_PPE_GLO_CFG_BUSY			BIT(31)
++
++#define MTK_PPE_FLOW_CFG			0x204
++#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG		BIT(6)
++#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG		BIT(7)
++#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE		BIT(8)
++#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE		BIT(9)
++#define MTK_PPE_FLOW_CFG_IP6_6RD		BIT(10)
++#define MTK_PPE_FLOW_CFG_IP4_NAT		BIT(12)
++#define MTK_PPE_FLOW_CFG_IP4_NAPT		BIT(13)
++#define MTK_PPE_FLOW_CFG_IP4_DSLITE		BIT(14)
++#define MTK_PPE_FLOW_CFG_L2_BRIDGE		BIT(15)
++#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST	BIT(16)
++#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG		BIT(17)
++#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL	BIT(18)
++#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY	BIT(19)
++#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY	BIT(20)
++
++#define MTK_PPE_IP_PROTO_CHK			0x208
++#define MTK_PPE_IP_PROTO_CHK_IPV4		GENMASK(15, 0)
++#define MTK_PPE_IP_PROTO_CHK_IPV6		GENMASK(31, 16)
++
++#define MTK_PPE_TB_CFG				0x21c
++#define MTK_PPE_TB_CFG_ENTRY_NUM		GENMASK(2, 0)
++#define MTK_PPE_TB_CFG_ENTRY_80B		BIT(3)
++#define MTK_PPE_TB_CFG_SEARCH_MISS		GENMASK(5, 4)
++#define MTK_PPE_TB_CFG_AGE_PREBIND		BIT(6)
++#define MTK_PPE_TB_CFG_AGE_NON_L4		BIT(7)
++#define MTK_PPE_TB_CFG_AGE_UNBIND		BIT(8)
++#define MTK_PPE_TB_CFG_AGE_TCP			BIT(9)
++#define MTK_PPE_TB_CFG_AGE_UDP			BIT(10)
++#define MTK_PPE_TB_CFG_AGE_TCP_FIN		BIT(11)
++#define MTK_PPE_TB_CFG_KEEPALIVE		GENMASK(13, 12)
++#define MTK_PPE_TB_CFG_HASH_MODE		GENMASK(15, 14)
++#define MTK_PPE_TB_CFG_SCAN_MODE		GENMASK(17, 16)
++#define MTK_PPE_TB_CFG_HASH_DEBUG		GENMASK(19, 18)
++
++enum {
++	MTK_PPE_SCAN_MODE_DISABLED,
++	MTK_PPE_SCAN_MODE_CHECK_AGE,
++	MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
++};
++
++enum {
++	MTK_PPE_KEEPALIVE_DISABLE,
++	MTK_PPE_KEEPALIVE_UNICAST_CPU,
++	MTK_PPE_KEEPALIVE_DUP_CPU = 3,
++};
++
++enum {
++	MTK_PPE_SEARCH_MISS_ACTION_DROP,
++	MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
++	MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
++};
++
++#define MTK_PPE_TB_BASE				0x220
++
++#define MTK_PPE_TB_USED				0x224
++#define MTK_PPE_TB_USED_NUM			GENMASK(13, 0)
++
++#define MTK_PPE_BIND_RATE			0x228
++#define MTK_PPE_BIND_RATE_BIND			GENMASK(15, 0)
++#define MTK_PPE_BIND_RATE_PREBIND		GENMASK(31, 16)
++
++#define MTK_PPE_BIND_LIMIT0			0x22c
++#define MTK_PPE_BIND_LIMIT0_QUARTER		GENMASK(13, 0)
++#define MTK_PPE_BIND_LIMIT0_HALF		GENMASK(29, 16)
++
++#define MTK_PPE_BIND_LIMIT1			0x230
++#define MTK_PPE_BIND_LIMIT1_FULL		GENMASK(13, 0)
++#define MTK_PPE_BIND_LIMIT1_NON_L4		GENMASK(23, 16)
++
++#define MTK_PPE_KEEPALIVE			0x234
++#define MTK_PPE_KEEPALIVE_TIME			GENMASK(15, 0)
++#define MTK_PPE_KEEPALIVE_TIME_TCP		GENMASK(23, 16)
++#define MTK_PPE_KEEPALIVE_TIME_UDP		GENMASK(31, 24)
++
++#define MTK_PPE_UNBIND_AGE			0x238
++#define MTK_PPE_UNBIND_AGE_MIN_PACKETS		GENMASK(31, 16)
++#define MTK_PPE_UNBIND_AGE_DELTA		GENMASK(7, 0)
++
++#define MTK_PPE_BIND_AGE0			0x23c
++#define MTK_PPE_BIND_AGE0_DELTA_NON_L4		GENMASK(30, 16)
++#define MTK_PPE_BIND_AGE0_DELTA_UDP		GENMASK(14, 0)
++
++#define MTK_PPE_BIND_AGE1			0x240
++#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN		GENMASK(30, 16)
++#define MTK_PPE_BIND_AGE1_DELTA_TCP		GENMASK(14, 0)
++
++#define MTK_PPE_HASH_SEED			0x244
++
++#define MTK_PPE_DEFAULT_CPU_PORT		0x248
++#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n)	(GENMASK(2, 0) << ((_n) * 4))
++
++#define MTK_PPE_MTU_DROP			0x308
++
++#define MTK_PPE_VLAN_MTU0			0x30c
++#define MTK_PPE_VLAN_MTU0_NONE			GENMASK(13, 0)
++#define MTK_PPE_VLAN_MTU0_1TAG			GENMASK(29, 16)
++
++#define MTK_PPE_VLAN_MTU1			0x310
++#define MTK_PPE_VLAN_MTU1_2TAG			GENMASK(13, 0)
++#define MTK_PPE_VLAN_MTU1_3TAG			GENMASK(29, 16)
++
++#define MTK_PPE_VPM_TPID			0x318
++
++#define MTK_PPE_CACHE_CTL			0x320
++#define MTK_PPE_CACHE_CTL_EN			BIT(0)
++#define MTK_PPE_CACHE_CTL_LOCK_CLR		BIT(4)
++#define MTK_PPE_CACHE_CTL_REQ			BIT(8)
++#define MTK_PPE_CACHE_CTL_CLEAR			BIT(9)
++#define MTK_PPE_CACHE_CTL_CMD			GENMASK(13, 12)
++
++#define MTK_PPE_MIB_CFG				0x334
++#define MTK_PPE_MIB_CFG_EN			BIT(0)
++#define MTK_PPE_MIB_CFG_RD_CLR			BIT(1)
++
++#define MTK_PPE_MIB_TB_BASE			0x338
++
++#define MTK_PPE_MIB_CACHE_CTL			0x350
++#define MTK_PPE_MIB_CACHE_CTL_EN		BIT(0)
++#define MTK_PPE_MIB_CACHE_CTL_FLUSH		BIT(2)
++
++#endif
+diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
+index a085213dc..813e30495 100644
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
+ 		ppp_destroy_interface(ppp);
+ }
+ 
++static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
++				 struct net_device_path *path)
++{
++	struct ppp *ppp = netdev_priv(ctx->dev);
++	struct ppp_channel *chan;
++	struct channel *pch;
++
++	if (ppp->flags & SC_MULTILINK)
++		return -EOPNOTSUPP;
++
++	if (list_empty(&ppp->channels))
++		return -ENODEV;
++
++	pch = list_first_entry(&ppp->channels, struct channel, clist);
++	chan = pch->chan;
++	if (!chan->ops->fill_forward_path)
++		return -EOPNOTSUPP;
++
++	return chan->ops->fill_forward_path(ctx, path, chan);
++}
++
+ static const struct net_device_ops ppp_netdev_ops = {
+ 	.ndo_init	 = ppp_dev_init,
+ 	.ndo_uninit      = ppp_dev_uninit,
+ 	.ndo_start_xmit  = ppp_start_xmit,
+ 	.ndo_do_ioctl    = ppp_net_ioctl,
+ 	.ndo_get_stats64 = ppp_get_stats64,
++	.ndo_fill_forward_path = ppp_fill_forward_path,
+ };
+ 
+ static struct device_type ppp_type = {
+diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
+index 087b01684..7a8c246ab 100644
+--- a/drivers/net/ppp/pppoe.c
++++ b/drivers/net/ppp/pppoe.c
+@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+ 	return __pppoe_xmit(sk, skb);
+ }
+ 
++static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
++				   struct net_device_path *path,
++				   const struct ppp_channel *chan)
++{
++	struct sock *sk = (struct sock *)chan->private;
++	struct pppox_sock *po = pppox_sk(sk);
++	struct net_device *dev = po->pppoe_dev;
++
++	if (sock_flag(sk, SOCK_DEAD) ||
++	    !(sk->sk_state & PPPOX_CONNECTED) || !dev)
++		return -1;
++
++	path->type = DEV_PATH_PPPOE;
++	path->encap.proto = htons(ETH_P_PPP_SES);
++	path->encap.id = be16_to_cpu(po->num);
++	memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
++	memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
++	path->dev = ctx->dev;
++	ctx->dev = dev;
++
++	return 0;
++}
++
+ static const struct ppp_channel_ops pppoe_chan_ops = {
+ 	.start_xmit = pppoe_xmit,
++	.fill_forward_path = pppoe_fill_forward_path,
+ };
+ 
+ static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 38af42bf8..9f64504ac 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
+ 				       struct sk_buff *skb,
+ 				       struct net_device *sb_dev);
+ 
++enum net_device_path_type {
++	DEV_PATH_ETHERNET = 0,
++	DEV_PATH_VLAN,
++	DEV_PATH_BRIDGE,
++	DEV_PATH_PPPOE,
++	DEV_PATH_DSA,
++};
++
++struct net_device_path {
++	enum net_device_path_type	type;
++	const struct net_device		*dev;
++	union {
++		struct {
++			u16		id;
++			__be16		proto;
++			u8		h_dest[ETH_ALEN];
++		} encap;
++		struct {
++			enum {
++				DEV_PATH_BR_VLAN_KEEP,
++				DEV_PATH_BR_VLAN_TAG,
++				DEV_PATH_BR_VLAN_UNTAG,
++				DEV_PATH_BR_VLAN_UNTAG_HW,
++			}		vlan_mode;
++			u16		vlan_id;
++			__be16		vlan_proto;
++		} bridge;
++		struct {
++			int port;
++			u16 proto;
++		} dsa;
++	};
++};
++
++#define NET_DEVICE_PATH_STACK_MAX	5
++#define NET_DEVICE_PATH_VLAN_MAX	2
++
++struct net_device_path_stack {
++	int			num_paths;
++	struct net_device_path	path[NET_DEVICE_PATH_STACK_MAX];
++};
++
++struct net_device_path_ctx {
++	const struct net_device *dev;
++	u8			daddr[ETH_ALEN];
++
++	int			num_vlans;
++	struct {
++		u16		id;
++		__be16		proto;
++	} vlan[NET_DEVICE_PATH_VLAN_MAX];
++};
++
+ enum tc_setup_type {
+ 	TC_SETUP_QDISC_MQPRIO,
+ 	TC_SETUP_CLSU32,
+@@ -844,6 +897,7 @@ enum tc_setup_type {
+ 	TC_SETUP_ROOT_QDISC,
+ 	TC_SETUP_QDISC_GRED,
+ 	TC_SETUP_QDISC_TAPRIO,
++	TC_SETUP_FT,
+ };
+ 
+ /* These structures hold the attributes of bpf state that are being passed
+@@ -1239,6 +1293,8 @@ struct tlsdev_ops;
+  *	Get devlink port instance associated with a given netdev.
+  *	Called with a reference on the netdevice and devlink locks only,
+  *	rtnl_lock is not held.
++ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
++ *     Get the forwarding path to reach the real device from the HW destination address
+  */
+ struct net_device_ops {
+ 	int			(*ndo_init)(struct net_device *dev);
+@@ -1436,6 +1492,8 @@ struct net_device_ops {
+ 	int			(*ndo_xsk_wakeup)(struct net_device *dev,
+ 						  u32 queue_id, u32 flags);
+ 	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
++	int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
++                                                         struct net_device_path *path);
+ };
+ 
+ /**
+@@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);
+ 
+ int dev_get_iflink(const struct net_device *dev);
+ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
++int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
++			  struct net_device_path_stack *stack);
+ struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
+ 				      unsigned short mask);
+ struct net_device *dev_get_by_name(struct net *net, const char *name);
+diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
+index 98966064e..91f9a9283 100644
+--- a/include/linux/ppp_channel.h
++++ b/include/linux/ppp_channel.h
+@@ -28,6 +28,9 @@ struct ppp_channel_ops {
+ 	int	(*start_xmit)(struct ppp_channel *, struct sk_buff *);
+ 	/* Handle an ioctl call that has come in via /dev/ppp. */
+ 	int	(*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
++	int	(*fill_forward_path)(struct net_device_path_ctx *,
++				     struct net_device_path *,
++				     const struct ppp_channel *);
+ };
+ 
+ struct ppp_channel {
+diff --git a/include/net/dsa.h b/include/net/dsa.h
+index 05f66d487..cafc74218 100644
+--- a/include/net/dsa.h
++++ b/include/net/dsa.h
+@@ -561,6 +561,8 @@ struct dsa_switch_ops {
+ 					  struct sk_buff *skb);
+ };
+ 
++struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
++
+ struct dsa_switch_driver {
+ 	struct list_head	list;
+ 	const struct dsa_switch_ops *ops;
+@@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ #define BRCM_TAG_GET_PORT(v)		((v) >> 8)
+ #define BRCM_TAG_GET_QUEUE(v)		((v) & 0xff)
+ 
++#if IS_ENABLED(CONFIG_NET_DSA)
++bool dsa_slave_dev_check(const struct net_device *dev);
++#else
++static inline bool dsa_slave_dev_check(const struct net_device *dev)
++{
++	return false;
++}
++#endif
+ 
+ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
+ int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
+diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
+index c6f7bd22d..59b873653 100644
+--- a/include/net/flow_offload.h
++++ b/include/net/flow_offload.h
+@@ -138,6 +138,7 @@ enum flow_action_id {
+ 	FLOW_ACTION_MPLS_PUSH,
+ 	FLOW_ACTION_MPLS_POP,
+ 	FLOW_ACTION_MPLS_MANGLE,
++	FLOW_ACTION_PPPOE_PUSH,
+ 	NUM_FLOW_ACTIONS,
+ };
+ 
+@@ -213,6 +214,9 @@ struct flow_action_entry {
+ 			u8		bos;
+ 			u8		ttl;
+ 		} mpls_mangle;
++		struct {				/* FLOW_ACTION_PPPOE_PUSH */
++			u16		sid;
++		} pppoe;
+ 	};
+ };
+ 
+diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
+index 2c739fc75..89ab8f180 100644
+--- a/include/net/ip6_route.h
++++ b/include/net/ip6_route.h
+@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
+ 	       !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
+ }
+ 
+-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
++static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
++						     bool forwarding)
+ {
+ 	struct inet6_dev *idev;
+ 	unsigned int mtu;
+ 
+-	if (dst_metric_locked(dst, RTAX_MTU)) {
++	if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
+ 		mtu = dst_metric_raw(dst, RTAX_MTU);
+ 		if (mtu)
+ 			goto out;
+diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+index 7b3c873f8..e95483192 100644
+--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
++++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+@@ -4,7 +4,4 @@
+ 
+ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
+ 
+-#include <linux/sysctl.h>
+-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
+-
+ #endif /* _NF_CONNTRACK_IPV6_H*/
+diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
+index 90690e37a..ce0bc3e62 100644
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
+ 	       !nf_ct_is_dying(ct);
+ }
+ 
++#define	NF_CT_DAY	(86400 * HZ)
++
++/* Set an arbitrary timeout large enough not to ever expire, this save
++ * us a check for the IPS_OFFLOAD_BIT from the packet path via
++ * nf_ct_is_expired().
++ */
++static inline void nf_ct_offload_timeout(struct nf_conn *ct)
++{
++	if (nf_ct_expires(ct) < NF_CT_DAY / 2)
++		WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
++}
++
+ struct kernel_param;
+ 
+ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
+diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
+index f7a060c6e..7f44a7715 100644
+--- a/include/net/netfilter/nf_conntrack_acct.h
++++ b/include/net/netfilter/nf_conntrack_acct.h
+@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
+ #endif
+ }
+ 
++void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
++		    unsigned int bytes);
++
++static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
++				     unsigned int bytes)
++{
++#if IS_ENABLED(CONFIG_NF_CONNTRACK)
++	nf_ct_acct_add(ct, dir, 1, bytes);
++#endif
++}
++
+ void nf_conntrack_acct_pernet_init(struct net *net);
+ 
+ int nf_conntrack_acct_init(void);
+diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
+index b37a7d608..7cf897677 100644
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -8,31 +8,99 @@
+ #include <linux/rcupdate.h>
+ #include <linux/netfilter.h>
+ #include <linux/netfilter/nf_conntrack_tuple_common.h>
++#include <net/flow_offload.h>
+ #include <net/dst.h>
++#include <linux/if_pppox.h>
++#include <linux/ppp_defs.h>
+ 
+ struct nf_flowtable;
++struct nf_flow_rule;
++struct flow_offload;
++enum flow_offload_tuple_dir;
++
++struct nf_flow_key {
++	struct flow_dissector_key_meta			meta;
++	struct flow_dissector_key_control		control;
++	struct flow_dissector_key_control		enc_control;
++	struct flow_dissector_key_basic			basic;
++	struct flow_dissector_key_vlan			vlan;
++	struct flow_dissector_key_vlan			cvlan;
++	union {
++		struct flow_dissector_key_ipv4_addrs	ipv4;
++		struct flow_dissector_key_ipv6_addrs	ipv6;
++	};
++	struct flow_dissector_key_keyid			enc_key_id;
++	union {
++		struct flow_dissector_key_ipv4_addrs	enc_ipv4;
++		struct flow_dissector_key_ipv6_addrs	enc_ipv6;
++	};
++	struct flow_dissector_key_tcp			tcp;
++	struct flow_dissector_key_ports			tp;
++} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
++
++struct nf_flow_match {
++	struct flow_dissector	dissector;
++	struct nf_flow_key	key;
++	struct nf_flow_key	mask;
++};
++
++struct nf_flow_rule {
++	struct nf_flow_match	match;
++	struct flow_rule	*rule;
++};
+ 
+ struct nf_flowtable_type {
+ 	struct list_head		list;
+ 	int				family;
+ 	int				(*init)(struct nf_flowtable *ft);
++	int				(*setup)(struct nf_flowtable *ft,
++						 struct net_device *dev,
++						 enum flow_block_command cmd);
++	int				(*action)(struct net *net,
++						  const struct flow_offload *flow,
++						  enum flow_offload_tuple_dir dir,
++						  struct nf_flow_rule *flow_rule);
+ 	void				(*free)(struct nf_flowtable *ft);
+ 	nf_hookfn			*hook;
+ 	struct module			*owner;
+ };
+ 
++enum nf_flowtable_flags {
++	NF_FLOWTABLE_HW_OFFLOAD		= 0x1,	/* NFT_FLOWTABLE_HW_OFFLOAD */
++	NF_FLOWTABLE_COUNTER		= 0x2,	/* NFT_FLOWTABLE_COUNTER */
++};
++
+ struct nf_flowtable {
+ 	struct list_head		list;
+ 	struct rhashtable		rhashtable;
++	int				priority;
+ 	const struct nf_flowtable_type	*type;
+ 	struct delayed_work		gc_work;
++	unsigned int			flags;
++	struct flow_block		flow_block;
++	struct rw_semaphore		flow_block_lock; /* Guards flow_block */
++	possible_net_t			net;
+ };
+ 
++static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
++{
++	return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
++}
++
+ enum flow_offload_tuple_dir {
+ 	FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
+ 	FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
+-	FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
+ };
++#define FLOW_OFFLOAD_DIR_MAX	IP_CT_DIR_MAX
++
++enum flow_offload_xmit_type {
++	FLOW_OFFLOAD_XMIT_UNSPEC	= 0,
++	FLOW_OFFLOAD_XMIT_NEIGH,
++	FLOW_OFFLOAD_XMIT_XFRM,
++	FLOW_OFFLOAD_XMIT_DIRECT,
++};
++
++#define NF_FLOW_TABLE_ENCAP_MAX		2
+ 
+ struct flow_offload_tuple {
+ 	union {
+@@ -52,11 +120,31 @@ struct flow_offload_tuple {
+ 
+ 	u8				l3proto;
+ 	u8				l4proto;
+-	u8				dir;
++	struct {
++		u16			id;
++		__be16			proto;
++	} encap[NF_FLOW_TABLE_ENCAP_MAX];
+ 
+-	u16				mtu;
++	/* All members above are keys for lookups, see flow_offload_hash(). */
++	struct { }			__hash;
+ 
+-	struct dst_entry		*dst_cache;
++	u8				dir:2,
++					xmit_type:2,
++					encap_num:2,
++					in_vlan_ingress:2;
++	u16				mtu;
++	union {
++		struct {
++			struct dst_entry *dst_cache;
++			u32		dst_cookie;
++		};
++		struct {
++			u32		ifidx;
++			u32		hw_ifidx;
++			u8		h_source[ETH_ALEN];
++			u8		h_dest[ETH_ALEN];
++		} out;
++	};
+ };
+ 
+ struct flow_offload_tuple_rhash {
+@@ -64,52 +152,139 @@ struct flow_offload_tuple_rhash {
+ 	struct flow_offload_tuple	tuple;
+ };
+ 
+-#define FLOW_OFFLOAD_SNAT	0x1
+-#define FLOW_OFFLOAD_DNAT	0x2
+-#define FLOW_OFFLOAD_DYING	0x4
+-#define FLOW_OFFLOAD_TEARDOWN	0x8
++enum nf_flow_flags {
++	NF_FLOW_SNAT,
++	NF_FLOW_DNAT,
++	NF_FLOW_TEARDOWN,
++	NF_FLOW_HW,
++	NF_FLOW_HW_DYING,
++	NF_FLOW_HW_DEAD,
++	NF_FLOW_HW_PENDING,
++};
++
++enum flow_offload_type {
++	NF_FLOW_OFFLOAD_UNSPEC	= 0,
++	NF_FLOW_OFFLOAD_ROUTE,
++};
+ 
+ struct flow_offload {
+ 	struct flow_offload_tuple_rhash		tuplehash[FLOW_OFFLOAD_DIR_MAX];
+-	u32					flags;
+-	union {
+-		/* Your private driver data here. */
+-		u32		timeout;
+-	};
++	struct nf_conn				*ct;
++	unsigned long				flags;
++	u16					type;
++	u32					timeout;
++	struct rcu_head				rcu_head;
+ };
+ 
+ #define NF_FLOW_TIMEOUT (30 * HZ)
++#define nf_flowtable_time_stamp	(u32)jiffies
++
++unsigned long flow_offload_get_timeout(struct flow_offload *flow);
++
++static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
++{
++	return (__s32)(timeout - nf_flowtable_time_stamp);
++}
+ 
+ struct nf_flow_route {
+ 	struct {
+-		struct dst_entry	*dst;
++		struct dst_entry		*dst;
++		struct {
++			u32			ifindex;
++			struct {
++				u16		id;
++				__be16		proto;
++			} encap[NF_FLOW_TABLE_ENCAP_MAX];
++			u8			num_encaps:2,
++						ingress_vlans:2;
++		} in;
++		struct {
++			u32			ifindex;
++			u32			hw_ifindex;
++			u8			h_source[ETH_ALEN];
++			u8			h_dest[ETH_ALEN];
++		} out;
++		enum flow_offload_xmit_type	xmit_type;
+ 	} tuple[FLOW_OFFLOAD_DIR_MAX];
+ };
+ 
+-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
+-					struct nf_flow_route *route);
++struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
+ void flow_offload_free(struct flow_offload *flow);
+ 
++static inline int
++nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
++			     flow_setup_cb_t *cb, void *cb_priv)
++{
++	struct flow_block *block = &flow_table->flow_block;
++	struct flow_block_cb *block_cb;
++	int err = 0;
++
++	down_write(&flow_table->flow_block_lock);
++	block_cb = flow_block_cb_lookup(block, cb, cb_priv);
++	if (block_cb) {
++		err = -EEXIST;
++		goto unlock;
++	}
++
++	block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
++	if (IS_ERR(block_cb)) {
++		err = PTR_ERR(block_cb);
++		goto unlock;
++	}
++
++	list_add_tail(&block_cb->list, &block->cb_list);
++
++unlock:
++	up_write(&flow_table->flow_block_lock);
++	return err;
++}
++
++static inline void
++nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
++			     flow_setup_cb_t *cb, void *cb_priv)
++{
++	struct flow_block *block = &flow_table->flow_block;
++	struct flow_block_cb *block_cb;
++
++	down_write(&flow_table->flow_block_lock);
++	block_cb = flow_block_cb_lookup(block, cb, cb_priv);
++	if (block_cb) {
++		list_del(&block_cb->list);
++		flow_block_cb_free(block_cb);
++	} else {
++		WARN_ON(true);
++	}
++	up_write(&flow_table->flow_block_lock);
++}
++
++int flow_offload_route_init(struct flow_offload *flow,
++			    const struct nf_flow_route *route);
++
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
++void flow_offload_refresh(struct nf_flowtable *flow_table,
++			  struct flow_offload *flow);
++
+ struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
+ 						     struct flow_offload_tuple *tuple);
++void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
++			      struct net_device *dev);
+ void nf_flow_table_cleanup(struct net_device *dev);
+ 
+ int nf_flow_table_init(struct nf_flowtable *flow_table);
+ void nf_flow_table_free(struct nf_flowtable *flow_table);
+ 
+ void flow_offload_teardown(struct flow_offload *flow);
+-static inline void flow_offload_dead(struct flow_offload *flow)
+-{
+-	flow->flags |= FLOW_OFFLOAD_DYING;
+-}
+ 
+-int nf_flow_snat_port(const struct flow_offload *flow,
+-		      struct sk_buff *skb, unsigned int thoff,
+-		      u8 protocol, enum flow_offload_tuple_dir dir);
+-int nf_flow_dnat_port(const struct flow_offload *flow,
+-		      struct sk_buff *skb, unsigned int thoff,
+-		      u8 protocol, enum flow_offload_tuple_dir dir);
++int nf_flow_table_iterate(struct nf_flowtable *flow_table,
++			  void (*iter)(struct flow_offload *flow, void *data),
++			  void *data);
++
++void nf_flow_snat_port(const struct flow_offload *flow,
++		       struct sk_buff *skb, unsigned int thoff,
++		       u8 protocol, enum flow_offload_tuple_dir dir);
++void nf_flow_dnat_port(const struct flow_offload *flow,
++		       struct sk_buff *skb, unsigned int thoff,
++		       u8 protocol, enum flow_offload_tuple_dir dir);
+ 
+ struct flow_ports {
+ 	__be16 source, dest;
+@@ -123,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ #define MODULE_ALIAS_NF_FLOWTABLE(family)	\
+ 	MODULE_ALIAS("nf-flowtable-" __stringify(family))
+ 
++void nf_flow_offload_add(struct nf_flowtable *flowtable,
++			 struct flow_offload *flow);
++void nf_flow_offload_del(struct nf_flowtable *flowtable,
++			 struct flow_offload *flow);
++void nf_flow_offload_stats(struct nf_flowtable *flowtable,
++			   struct flow_offload *flow);
++
++void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
++int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
++				struct net_device *dev,
++				enum flow_block_command cmd);
++int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
++			    enum flow_offload_tuple_dir dir,
++			    struct nf_flow_rule *flow_rule);
++int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
++			    enum flow_offload_tuple_dir dir,
++			    struct nf_flow_rule *flow_rule);
++
++int nf_flow_table_offload_init(void);
++void nf_flow_table_offload_exit(void);
++
++static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
++{
++	__be16 proto;
++
++	proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
++			     sizeof(struct pppoe_hdr)));
++	switch (proto) {
++	case htons(PPP_IP):
++		return htons(ETH_P_IP);
++	case htons(PPP_IPV6):
++		return htons(ETH_P_IPV6);
++	}
++
++	return 0;
++}
++
+ #endif /* _NF_FLOW_TABLE_H */
+diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
+index 806454e76..9e3963c8f 100644
+--- a/include/net/netns/conntrack.h
++++ b/include/net/netns/conntrack.h
+@@ -27,6 +27,9 @@ struct nf_tcp_net {
+ 	int tcp_loose;
+ 	int tcp_be_liberal;
+ 	int tcp_max_retrans;
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	unsigned int offload_timeout;
++#endif
+ };
+ 
+ enum udp_conntrack {
+@@ -37,6 +40,9 @@ enum udp_conntrack {
+ 
+ struct nf_udp_net {
+ 	unsigned int timeouts[UDP_CT_MAX];
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	unsigned int offload_timeout;
++#endif
+ };
+ 
+ struct nf_icmp_net {
+diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
+index 336014bf8..ae698d11c 100644
+--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
++++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
+@@ -105,14 +105,19 @@ enum ip_conntrack_status {
+ 	IPS_OFFLOAD_BIT = 14,
+ 	IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
+ 
++	/* Conntrack has been offloaded to hardware. */
++	IPS_HW_OFFLOAD_BIT = 15,
++	IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
++
+ 	/* Be careful here, modifying these bits can make things messy,
+ 	 * so don't let users modify them directly.
+ 	 */
+ 	IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
+ 				 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
+-				 IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
++				 IPS_SEQ_ADJUST | IPS_TEMPLATE |
++				 IPS_OFFLOAD | IPS_HW_OFFLOAD),
+ 
+-	__IPS_MAX_BIT = 15,
++	__IPS_MAX_BIT = 16,
+ };
+ 
+ /* Connection tracking event types */
+diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
+new file mode 100644
+index 000000000..5841bbe0e
+--- /dev/null
++++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
+@@ -0,0 +1,17 @@
++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
++#ifndef _XT_FLOWOFFLOAD_H
++#define _XT_FLOWOFFLOAD_H
++
++#include <linux/types.h>
++
++enum {
++	XT_FLOWOFFLOAD_HW	= 1 << 0,
++
++	XT_FLOWOFFLOAD_MASK	= XT_FLOWOFFLOAD_HW
++};
++
++struct xt_flowoffload_target_info {
++	__u32 flags;
++};
++
++#endif /* _XT_FLOWOFFLOAD_H */
+diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
+index 589615ec4..444ab5fae 100644
+--- a/net/8021q/vlan_dev.c
++++ b/net/8021q/vlan_dev.c
+@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
+ 	return real_dev->ifindex;
+ }
+ 
++static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
++				      struct net_device_path *path)
++{
++	struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
++
++	path->type = DEV_PATH_VLAN;
++	path->encap.id = vlan->vlan_id;
++	path->encap.proto = vlan->vlan_proto;
++	path->dev = ctx->dev;
++	ctx->dev = vlan->real_dev;
++	if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
++		return -ENOSPC;
++
++	ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
++	ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
++	ctx->num_vlans++;
++
++	return 0;
++}
++
+ static const struct ethtool_ops vlan_ethtool_ops = {
+ 	.get_link_ksettings	= vlan_ethtool_get_link_ksettings,
+ 	.get_drvinfo	        = vlan_ethtool_get_drvinfo,
+@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
+ #endif
+ 	.ndo_fix_features	= vlan_dev_fix_features,
+ 	.ndo_get_iflink		= vlan_dev_get_iflink,
++	.ndo_fill_forward_path	= vlan_dev_fill_forward_path,
+ };
+ 
+ static void vlan_dev_free(struct net_device *dev)
+diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
+index 501f77f0f..0940b44cd 100644
+--- a/net/bridge/br_device.c
++++ b/net/bridge/br_device.c
+@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
+ 	return br_del_if(br, slave_dev);
+ }
+ 
++static int br_fill_forward_path(struct net_device_path_ctx *ctx,
++				struct net_device_path *path)
++{
++	struct net_bridge_fdb_entry *f;
++	struct net_bridge_port *dst;
++	struct net_bridge *br;
++
++	if (netif_is_bridge_port(ctx->dev))
++		return -1;
++
++	br = netdev_priv(ctx->dev);
++
++	br_vlan_fill_forward_path_pvid(br, ctx, path);
++
++	f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
++	if (!f || !f->dst)
++		return -1;
++
++	dst = READ_ONCE(f->dst);
++	if (!dst)
++		return -1;
++
++	if (br_vlan_fill_forward_path_mode(br, dst, path))
++		return -1;
++
++	path->type = DEV_PATH_BRIDGE;
++	path->dev = dst->br->dev;
++	ctx->dev = dst->dev;
++
++	switch (path->bridge.vlan_mode) {
++	case DEV_PATH_BR_VLAN_TAG:
++		if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
++			return -ENOSPC;
++		ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
++		ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
++		ctx->num_vlans++;
++		break;
++	case DEV_PATH_BR_VLAN_UNTAG_HW:
++	case DEV_PATH_BR_VLAN_UNTAG:
++		ctx->num_vlans--;
++		break;
++	case DEV_PATH_BR_VLAN_KEEP:
++		break;
++	}
++
++	return 0;
++}
++
+ static const struct ethtool_ops br_ethtool_ops = {
+ 	.get_drvinfo    = br_getinfo,
+ 	.get_link	= ethtool_op_get_link,
+@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
+ 	.ndo_bridge_setlink	 = br_setlink,
+ 	.ndo_bridge_dellink	 = br_dellink,
+ 	.ndo_features_check	 = passthru_features_check,
++	.ndo_fill_forward_path	 = br_fill_forward_path,
+ };
+ 
+ static struct device_type br_type = {
+diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
+index a736be8a1..4bd9e9b57 100644
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
+ int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
+ 			 void *ptr);
+ 
++void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
++				    struct net_device_path_ctx *ctx,
++				    struct net_device_path *path);
++int br_vlan_fill_forward_path_mode(struct net_bridge *br,
++				   struct net_bridge_port *dst,
++				   struct net_device_path *path);
++
+ static inline struct net_bridge_vlan_group *br_vlan_group(
+ 					const struct net_bridge *br)
+ {
+@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
+ 	return 0;
+ }
+ 
++static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
++						  struct net_device_path_ctx *ctx,
++						  struct net_device_path *path)
++{
++}
++
++static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
++						 struct net_bridge_port *dst,
++						 struct net_device_path *path)
++{
++	return 0;
++}
++
+ static inline struct net_bridge_vlan_group *br_vlan_group(
+ 					const struct net_bridge *br)
+ {
+diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
+index 9257292bd..bcfd16924 100644
+--- a/net/bridge/br_vlan.c
++++ b/net/bridge/br_vlan.c
+@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
+ }
+ EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
+ 
++void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
++				    struct net_device_path_ctx *ctx,
++				    struct net_device_path *path)
++{
++	struct net_bridge_vlan_group *vg;
++	int idx = ctx->num_vlans - 1;
++	u16 vid;
++
++	path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
++
++	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
++		return;
++
++	vg = br_vlan_group(br);
++
++	if (idx >= 0 &&
++	    ctx->vlan[idx].proto == br->vlan_proto) {
++		vid = ctx->vlan[idx].id;
++	} else {
++		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
++		vid = br_get_pvid(vg);
++	}
++
++	path->bridge.vlan_id = vid;
++	path->bridge.vlan_proto = br->vlan_proto;
++}
++
++int br_vlan_fill_forward_path_mode(struct net_bridge *br,
++				   struct net_bridge_port *dst,
++				   struct net_device_path *path)
++{
++	struct net_bridge_vlan_group *vg;
++	struct net_bridge_vlan *v;
++
++	if (!br_opt_get(br, BROPT_VLAN_ENABLED))
++		return 0;
++
++	vg = nbp_vlan_group_rcu(dst);
++	v = br_vlan_find(vg, path->bridge.vlan_id);
++	if (!v || !br_vlan_should_use(v))
++		return -EINVAL;
++
++	if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
++		return 0;
++
++	if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
++		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
++	else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
++		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
++	else
++		path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
++
++	return 0;
++}
++
+ int br_vlan_get_info(const struct net_device *dev, u16 vid,
+ 		     struct bridge_vlan_info *p_vinfo)
+ {
+diff --git a/net/core/dev.c b/net/core/dev.c
+index fe2c856b9..4f0edb218 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+ }
+ EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+ 
++static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
++{
++	int k = stack->num_paths++;
++
++	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
++		return NULL;
++
++	return &stack->path[k];
++}
++
++int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
++			  struct net_device_path_stack *stack)
++{
++	const struct net_device *last_dev;
++	struct net_device_path_ctx ctx = {
++		.dev	= dev,
++	};
++	struct net_device_path *path;
++	int ret = 0;
++
++	memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
++	stack->num_paths = 0;
++	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
++		last_dev = ctx.dev;
++		path = dev_fwd_path(stack);
++		if (!path)
++			return -1;
++
++		memset(path, 0, sizeof(struct net_device_path));
++		ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
++		if (ret < 0)
++			return -1;
++
++		if (WARN_ON_ONCE(last_dev == ctx.dev))
++			return -1;
++	}
++	path = dev_fwd_path(stack);
++	if (!path)
++		return -1;
++	path->type = DEV_PATH_ETHERNET;
++	path->dev = ctx.dev;
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(dev_fill_forward_path);
++
+ /**
+  *	__dev_get_by_name	- find a device by its name
+  *	@net: the applicable net namespace
+diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
+index ca80f8699..35a1249a9 100644
+--- a/net/dsa/dsa.c
++++ b/net/dsa/dsa.c
+@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ }
+ EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+ 
++struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
++{
++	if (!netdev || !dsa_slave_dev_check(netdev))
++		return ERR_PTR(-ENODEV);
++
++	return dsa_slave_to_port(netdev);
++}
++EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
++
+ static int __init dsa_init_module(void)
+ {
+ 	int rc;
+diff --git a/net/dsa/slave.c b/net/dsa/slave.c
+index 036fda317..2dfaa1eac 100644
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -22,8 +22,6 @@
+ 
+ #include "dsa_priv.h"
+ 
+-static bool dsa_slave_dev_check(const struct net_device *dev);
+-
+ /* slave mii_bus handling ***************************************************/
+ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
+ {
+@@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
+ 	}
+ }
+ 
++static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
++				    void *type_data)
++{
++	struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
++	struct net_device *master = cpu_dp->master;
++
++	if (!master->netdev_ops->ndo_setup_tc)
++		return -EOPNOTSUPP;
++
++	return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
++}
++
+ static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ 			      void *type_data)
+ {
+ 	struct dsa_port *dp = dsa_slave_to_port(dev);
+ 	struct dsa_switch *ds = dp->ds;
+ 
+-	if (type == TC_SETUP_BLOCK)
++	switch (type) {
++	case TC_SETUP_BLOCK:
+ 		return dsa_slave_setup_tc_block(dev, type_data);
++	case TC_SETUP_FT:
++		return dsa_slave_setup_ft_block(ds, dp->index, type_data);
++	default:
++		break;
++	}
+ 
+ 	if (!ds->ops->port_setup_tc)
+ 		return -EOPNOTSUPP;
+@@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
+ 	return dp->ds->devlink ? &dp->devlink_port : NULL;
+ }
+ 
++static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
++				       struct net_device_path *path)
++{
++	struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
++	struct dsa_port *cpu_dp = dp->cpu_dp;
++
++	path->dev = ctx->dev;
++	path->type = DEV_PATH_DSA;
++	path->dsa.proto = cpu_dp->tag_ops->proto;
++	path->dsa.port = dp->index;
++	ctx->dev = cpu_dp->master;
++
++	return 0;
++}
++
+ static const struct net_device_ops dsa_slave_netdev_ops = {
+ 	.ndo_open	 	= dsa_slave_open,
+ 	.ndo_stop		= dsa_slave_close,
+@@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
+ 	.ndo_vlan_rx_add_vid	= dsa_slave_vlan_rx_add_vid,
+ 	.ndo_vlan_rx_kill_vid	= dsa_slave_vlan_rx_kill_vid,
+ 	.ndo_get_devlink_port	= dsa_slave_get_devlink_port,
++	.ndo_fill_forward_path	= dsa_slave_fill_forward_path,
+ };
+ 
+ static struct device_type dsa_type = {
+@@ -1497,10 +1529,11 @@ void dsa_slave_destroy(struct net_device *slave_dev)
+ 	free_netdev(slave_dev);
+ }
+ 
+-static bool dsa_slave_dev_check(const struct net_device *dev)
++bool dsa_slave_dev_check(const struct net_device *dev)
+ {
+ 	return dev->netdev_ops == &dsa_slave_netdev_ops;
+ }
++EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
+ 
+ static int dsa_slave_changeupper(struct net_device *dev,
+ 				 struct netdev_notifier_changeupper_info *info)
+diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
+index f17b40211..803b92e4c 100644
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -56,8 +56,6 @@ config NF_TABLES_ARP
+ 	help
+ 	  This option enables the ARP support for nf_tables.
+ 
+-endif # NF_TABLES
+-
+ config NF_FLOW_TABLE_IPV4
+ 	tristate "Netfilter flow table IPv4 module"
+ 	depends on NF_FLOW_TABLE
+@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
+ 
+ 	  To compile it as a module, choose M here.
+ 
++endif # NF_TABLES
++
+ config NF_DUP_IPV4
+ 	tristate "Netfilter IPv4 packet duplication to alternate destination"
+ 	depends on !NF_CONNTRACK || NF_CONNTRACK
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 5585e3a94..bb76f6061 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
+ 		}
+ 	}
+ 
+-	mtu = ip6_dst_mtu_forward(dst);
++	mtu = ip6_dst_mtu_maybe_forward(dst, true);
+ 	if (mtu < IPV6_MIN_MTU)
+ 		mtu = IPV6_MIN_MTU;
+ 
+diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
+index 69443e9a3..0b481d236 100644
+--- a/net/ipv6/netfilter/Kconfig
++++ b/net/ipv6/netfilter/Kconfig
+@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
+ 	  multicast or blackhole.
+ 
+ endif # NF_TABLES_IPV6
+-endif # NF_TABLES
+ 
+ config NF_FLOW_TABLE_IPV6
+ 	tristate "Netfilter flow table IPv6 module"
+@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
+ 
+ 	  To compile it as a module, choose M here.
+ 
++endif # NF_TABLES
++
+ config NF_DUP_IPV6
+ 	tristate "Netfilter IPv6 packet duplication to alternate destination"
+ 	depends on !NF_CONNTRACK || NF_CONNTRACK
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 98aaf0b79..2b357ac71 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -83,7 +83,7 @@ enum rt6_nud_state {
+ 
+ static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
+ static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
+-static unsigned int	 ip6_mtu(const struct dst_entry *dst);
++static unsigned int	ip6_mtu(const struct dst_entry *dst);
+ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+ static void		ip6_dst_destroy(struct dst_entry *);
+ static void		ip6_dst_ifdown(struct dst_entry *,
+@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
+ 
+ static unsigned int ip6_mtu(const struct dst_entry *dst)
+ {
+-	struct inet6_dev *idev;
+-	unsigned int mtu;
+-
+-	mtu = dst_metric_raw(dst, RTAX_MTU);
+-	if (mtu)
+-		goto out;
+-
+-	mtu = IPV6_MIN_MTU;
+-
+-	rcu_read_lock();
+-	idev = __in6_dev_get(dst->dev);
+-	if (idev)
+-		mtu = idev->cnf.mtu6;
+-	rcu_read_unlock();
+-
+-out:
+-	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+-
+-	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
++	return ip6_dst_mtu_maybe_forward(dst, false);
+ }
+ 
+ /* MTU selection:
+diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
+index b967763f5..c040e713a 100644
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV
+ 
+ endif # NF_TABLES_NETDEV
+ 
+-endif # NF_TABLES
+-
+ config NF_FLOW_TABLE_INET
+ 	tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ 	depends on NF_FLOW_TABLE
+@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET
+ 
+ 	  To compile it as a module, choose M here.
+ 
++endif # NF_TABLES
++
+ config NF_FLOW_TABLE
+ 	tristate "Netfilter flow table module"
+ 	depends on NETFILTER_INGRESS
+ 	depends on NF_CONNTRACK
+-	depends on NF_TABLES
+ 	help
+ 	  This option adds the flow table core infrastructure.
+ 
+@@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
+ 	depends on NETFILTER_ADVANCED
+ 	select NETFILTER_XT_TARGET_CT
+ 
++config NETFILTER_XT_TARGET_FLOWOFFLOAD
++	tristate '"FLOWOFFLOAD" target support'
++	depends on NF_FLOW_TABLE
++	depends on NETFILTER_INGRESS
++	help
++	  This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
++	  module to speed up processing of packets by bypassing the usual
++	  netfilter chains
++
+ config NETFILTER_XT_TARGET_RATEEST
+ 	tristate '"RATEEST" target support'
+ 	depends on NETFILTER_ADVANCED
+diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
+index 4fc075b61..d93a121bc 100644
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV)	+= nft_fwd_netdev.o
+ 
+ # flow table infrastructure
+ obj-$(CONFIG_NF_FLOW_TABLE)	+= nf_flow_table.o
+-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
++nf_flow_table-objs		:= nf_flow_table_core.o nf_flow_table_ip.o \
++				   nf_flow_table_offload.o
+ 
+ obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
+ 
+@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
++obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index f6ab6f484..f689e19d8 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -864,9 +864,8 @@ out:
+ }
+ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
+ 
+-static inline void nf_ct_acct_update(struct nf_conn *ct,
+-				     enum ip_conntrack_info ctinfo,
+-				     unsigned int len)
++void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
++		    unsigned int bytes)
+ {
+ 	struct nf_conn_acct *acct;
+ 
+@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
+ 	if (acct) {
+ 		struct nf_conn_counter *counter = acct->counter;
+ 
+-		atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
+-		atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
++		atomic64_add(packets, &counter[dir].packets);
++		atomic64_add(bytes, &counter[dir].bytes);
+ 	}
+ }
++EXPORT_SYMBOL_GPL(nf_ct_acct_add);
+ 
+ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ 			     const struct nf_conn *loser_ct)
+@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
+ 
+ 		/* u32 should be fine since we must have seen one packet. */
+ 		bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
+-		nf_ct_acct_update(ct, ctinfo, bytes);
++		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
+ 	}
+ }
+ 
+@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
+ 
+ 			tmp = nf_ct_tuplehash_to_ctrack(h);
+ 
+-			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
++			if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
++				nf_ct_offload_timeout(tmp);
+ 				continue;
++			}
+ 
+ 			if (nf_ct_is_expired(tmp)) {
+ 				nf_ct_gc_expired(tmp);
+@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
+ 		WRITE_ONCE(ct->timeout, extra_jiffies);
+ acct:
+ 	if (do_acct)
+-		nf_ct_acct_update(ct, ctinfo, skb->len);
++		nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
+ }
+ EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
+ 
+@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
+ 		     enum ip_conntrack_info ctinfo,
+ 		     const struct sk_buff *skb)
+ {
+-	nf_ct_acct_update(ct, ctinfo, skb->len);
++	nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
+ 
+ 	return nf_ct_delete(ct, 0, 0);
+ }
+diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
+index 7204f0366..3742bae21 100644
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
+ 	tn->tcp_loose = nf_ct_tcp_loose;
+ 	tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
+ 	tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
++
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	tn->offload_timeout = 30 * HZ;
++#endif
+ }
+ 
+ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
+diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
+index e3a2d018f..a1579d6c3 100644
+--- a/net/netfilter/nf_conntrack_proto_udp.c
++++ b/net/netfilter/nf_conntrack_proto_udp.c
+@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
+ 
+ 	for (i = 0; i < UDP_CT_MAX; i++)
+ 		un->timeouts[i] = udp_timeouts[i];
++
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	un->offload_timeout = 30 * HZ;
++#endif
+ }
+ 
+ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
+diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
+index 9c6259c28..10d9f93ce 100644
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
+ 	if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+ 		goto release;
+ 
+-	if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
++	if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
++		seq_puts(s, "[HW_OFFLOAD] ");
++	else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+ 		seq_puts(s, "[OFFLOAD] ");
+ 	else if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ 		seq_puts(s, "[ASSURED] ");
+@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
+ 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
+ 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
+ 	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
++#endif
+ 	NF_SYSCTL_CT_PROTO_TCP_LOOSE,
+ 	NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+ 	NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
+ 	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
+ 	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
++#endif
+ 	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
+ 	NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
+ #ifdef CONFIG_NF_CT_PROTO_SCTP
+@@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= proc_dointvec_jiffies,
+ 	},
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
++		.procname	= "nf_flowtable_tcp_timeout",
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec_jiffies,
++	},
++#endif
+ 	[NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
+ 		.procname	= "nf_conntrack_tcp_loose",
+ 		.maxlen		= sizeof(int),
+@@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
+ 		.mode		= 0644,
+ 		.proc_handler	= proc_dointvec_jiffies,
+ 	},
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
++		.procname	= "nf_flowtable_udp_timeout",
++		.maxlen		= sizeof(unsigned int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec_jiffies,
++	},
++#endif
+ 	[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
+ 		.procname	= "nf_conntrack_icmp_timeout",
+ 		.maxlen		= sizeof(unsigned int),
+@@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
+ 	XASSIGN(LIBERAL, &tn->tcp_be_liberal);
+ 	XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+ #undef XASSIGN
++
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
++#endif
++
+ }
+ 
+ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
+@@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
+ 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
+ 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
+ 	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
++#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
++	table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
++#endif
+ 
+ 	nf_conntrack_standalone_init_tcp_sysctl(net, table);
+ 	nf_conntrack_standalone_init_sctp_sysctl(net, table);
+diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
+index e1ffc66b8..103655813 100644
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -7,31 +7,21 @@
+ #include <linux/netdevice.h>
+ #include <net/ip.h>
+ #include <net/ip6_route.h>
+-#include <net/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/netfilter/nf_conntrack_core.h>
+ #include <net/netfilter/nf_conntrack_l4proto.h>
+ #include <net/netfilter/nf_conntrack_tuple.h>
+ 
+-struct flow_offload_entry {
+-	struct flow_offload	flow;
+-	struct nf_conn		*ct;
+-	struct rcu_head		rcu_head;
+-};
+-
+ static DEFINE_MUTEX(flowtable_lock);
+ static LIST_HEAD(flowtables);
+ 
+ static void
+-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+-		      struct nf_flow_route *route,
++flow_offload_fill_dir(struct flow_offload *flow,
+ 		      enum flow_offload_tuple_dir dir)
+ {
+ 	struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
+-	struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+-	struct dst_entry *other_dst = route->tuple[!dir].dst;
+-	struct dst_entry *dst = route->tuple[dir].dst;
++	struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
+ 
+ 	ft->dir = dir;
+ 
+@@ -39,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+ 	case NFPROTO_IPV4:
+ 		ft->src_v4 = ctt->src.u3.in;
+ 		ft->dst_v4 = ctt->dst.u3.in;
+-		ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
+ 		break;
+ 	case NFPROTO_IPV6:
+ 		ft->src_v6 = ctt->src.u3.in6;
+ 		ft->dst_v6 = ctt->dst.u3.in6;
+-		ft->mtu = ip6_dst_mtu_forward(dst);
+ 		break;
+ 	}
+ 
+@@ -52,49 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+ 	ft->l4proto = ctt->dst.protonum;
+ 	ft->src_port = ctt->src.u.tcp.port;
+ 	ft->dst_port = ctt->dst.u.tcp.port;
+-
+-	ft->iifidx = other_dst->dev->ifindex;
+-	ft->dst_cache = dst;
+ }
+ 
+-struct flow_offload *
+-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
++struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
+ {
+-	struct flow_offload_entry *entry;
+ 	struct flow_offload *flow;
+ 
+ 	if (unlikely(nf_ct_is_dying(ct) ||
+ 	    !atomic_inc_not_zero(&ct->ct_general.use)))
+ 		return NULL;
+ 
+-	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+-	if (!entry)
++	flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
++	if (!flow)
+ 		goto err_ct_refcnt;
+ 
+-	flow = &entry->flow;
++	flow->ct = ct;
+ 
+-	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
+-		goto err_dst_cache_original;
+-
+-	if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
+-		goto err_dst_cache_reply;
+-
+-	entry->ct = ct;
+-
+-	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+-	flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
++	flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
++	flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
+ 
+ 	if (ct->status & IPS_SRC_NAT)
+-		flow->flags |= FLOW_OFFLOAD_SNAT;
++		__set_bit(NF_FLOW_SNAT, &flow->flags);
+ 	if (ct->status & IPS_DST_NAT)
+-		flow->flags |= FLOW_OFFLOAD_DNAT;
++		__set_bit(NF_FLOW_DNAT, &flow->flags);
+ 
+ 	return flow;
+ 
+-err_dst_cache_reply:
+-	dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
+-err_dst_cache_original:
+-	kfree(entry);
+ err_ct_refcnt:
+ 	nf_ct_put(ct);
+ 
+@@ -102,40 +73,135 @@ err_ct_refcnt:
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_alloc);
+ 
+-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
++static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
+ {
+-	tcp->state = TCP_CONNTRACK_ESTABLISHED;
+-	tcp->seen[0].td_maxwin = 0;
+-	tcp->seen[1].td_maxwin = 0;
++	const struct rt6_info *rt;
++
++	if (flow_tuple->l3proto == NFPROTO_IPV6) {
++		rt = (const struct rt6_info *)flow_tuple->dst_cache;
++		return rt6_get_cookie(rt);
++	}
++
++	return 0;
+ }
+ 
+-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT	(120 * HZ)
+-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT	(30 * HZ)
++static int flow_offload_fill_route(struct flow_offload *flow,
++				   const struct nf_flow_route *route,
++				   enum flow_offload_tuple_dir dir)
++{
++	struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
++	struct dst_entry *dst = route->tuple[dir].dst;
++	int i, j = 0;
++
++	switch (flow_tuple->l3proto) {
++	case NFPROTO_IPV4:
++		flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
++		break;
++	case NFPROTO_IPV6:
++		flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
++		break;
++	}
++
++	flow_tuple->iifidx = route->tuple[dir].in.ifindex;
++	for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
++		flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
++		flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
++		if (route->tuple[dir].in.ingress_vlans & BIT(i))
++			flow_tuple->in_vlan_ingress |= BIT(j);
++		j++;
++	}
++	flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
++
++	switch (route->tuple[dir].xmit_type) {
++	case FLOW_OFFLOAD_XMIT_DIRECT:
++		memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
++		       ETH_ALEN);
++		memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
++		       ETH_ALEN);
++		flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
++		flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
++		break;
++	case FLOW_OFFLOAD_XMIT_XFRM:
++	case FLOW_OFFLOAD_XMIT_NEIGH:
++		if (!dst_hold_safe(route->tuple[dir].dst))
++			return -1;
++
++		flow_tuple->dst_cache = dst;
++		flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
++		break;
++	default:
++		WARN_ON_ONCE(1);
++		break;
++	}
++	flow_tuple->xmit_type = route->tuple[dir].xmit_type;
+ 
+-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
++	return 0;
++}
++
++static void nft_flow_dst_release(struct flow_offload *flow,
++				 enum flow_offload_tuple_dir dir)
++{
++	if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
++	    flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
++		dst_release(flow->tuplehash[dir].tuple.dst_cache);
++}
++
++int flow_offload_route_init(struct flow_offload *flow,
++			    const struct nf_flow_route *route)
++{
++	int err;
++
++	err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
++	if (err < 0)
++		return err;
++
++	err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
++	if (err < 0)
++		goto err_route_reply;
++
++	flow->type = NF_FLOW_OFFLOAD_ROUTE;
++
++	return 0;
++
++err_route_reply:
++	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
++
++	return err;
++}
++EXPORT_SYMBOL_GPL(flow_offload_route_init);
++
++static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
+ {
+-	return (__s32)(timeout - (u32)jiffies);
++	tcp->state = TCP_CONNTRACK_ESTABLISHED;
++	tcp->seen[0].td_maxwin = 0;
++	tcp->seen[1].td_maxwin = 0;
+ }
+ 
+ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
+ {
+-	const struct nf_conntrack_l4proto *l4proto;
++	struct net *net = nf_ct_net(ct);
+ 	int l4num = nf_ct_protonum(ct);
+-	unsigned int timeout;
++	s32 timeout;
+ 
+-	l4proto = nf_ct_l4proto_find(l4num);
+-	if (!l4proto)
+-		return;
++	if (l4num == IPPROTO_TCP) {
++		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+ 
+-	if (l4num == IPPROTO_TCP)
+-		timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
+-	else if (l4num == IPPROTO_UDP)
+-		timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
+-	else
++		timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++		timeout -= tn->offload_timeout;
++	} else if (l4num == IPPROTO_UDP) {
++		struct nf_udp_net *tn = nf_udp_pernet(net);
++
++		timeout = tn->timeouts[UDP_CT_REPLIED];
++		timeout -= tn->offload_timeout;
++	} else {
+ 		return;
++	}
++
++	if (timeout < 0)
++		timeout = 0;
+ 
+-	if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
+-		ct->timeout = nfct_time_stamp + timeout;
++	if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
++		WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }
+ 
+ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
+@@ -150,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
+ 	flow_offload_fixup_ct_timeout(ct);
+ }
+ 
+-void flow_offload_free(struct flow_offload *flow)
++static void flow_offload_route_release(struct flow_offload *flow)
+ {
+-	struct flow_offload_entry *e;
++	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
++	nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
++}
+ 
+-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+-	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+-	e = container_of(flow, struct flow_offload_entry, flow);
+-	if (flow->flags & FLOW_OFFLOAD_DYING)
+-		nf_ct_delete(e->ct, 0, 0);
+-	nf_ct_put(e->ct);
+-	kfree_rcu(e, rcu_head);
++void flow_offload_free(struct flow_offload *flow)
++{
++	switch (flow->type) {
++	case NF_FLOW_OFFLOAD_ROUTE:
++		flow_offload_route_release(flow);
++		break;
++	default:
++		break;
++	}
++	nf_ct_put(flow->ct);
++	kfree_rcu(flow, rcu_head);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_free);
+ 
+@@ -168,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+ {
+ 	const struct flow_offload_tuple *tuple = data;
+ 
+-	return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
++	return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
+ }
+ 
+ static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+ {
+ 	const struct flow_offload_tuple_rhash *tuplehash = data;
+ 
+-	return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
++	return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
+ }
+ 
+ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+@@ -184,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
+ 	const struct flow_offload_tuple *tuple = arg->key;
+ 	const struct flow_offload_tuple_rhash *x = ptr;
+ 
+-	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
++	if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
+ 		return 1;
+ 
+ 	return 0;
+@@ -198,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
+ 	.automatic_shrinking	= true,
+ };
+ 
+-#define        DAY     (86400 * HZ)
+-
+-/* Set an arbitrary timeout large enough not to ever expire, this save
+- * us a check for the IPS_OFFLOAD_BIT from the packet path via
+- * nf_ct_is_expired().
+- */
+-static void nf_ct_offload_timeout(struct flow_offload *flow)
++unsigned long flow_offload_get_timeout(struct flow_offload *flow)
+ {
+-	struct flow_offload_entry *entry;
+-	struct nf_conn *ct;
++	unsigned long timeout = NF_FLOW_TIMEOUT;
++	struct net *net = nf_ct_net(flow->ct);
++	int l4num = nf_ct_protonum(flow->ct);
++
++	if (l4num == IPPROTO_TCP) {
++		struct nf_tcp_net *tn = nf_tcp_pernet(net);
+ 
+-	entry = container_of(flow, struct flow_offload_entry, flow);
+-	ct = entry->ct;
++		timeout = tn->offload_timeout;
++	} else if (l4num == IPPROTO_UDP) {
++		struct nf_udp_net *tn = nf_udp_pernet(net);
++
++		timeout = tn->offload_timeout;
++	}
+ 
+-	if (nf_ct_expires(ct) < DAY / 2)
+-		ct->timeout = nfct_time_stamp + DAY;
++	return timeout;
+ }
+ 
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ {
+ 	int err;
+ 
+-	nf_ct_offload_timeout(flow);
+-	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++	flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
+ 
+ 	err = rhashtable_insert_fast(&flow_table->rhashtable,
+ 				     &flow->tuplehash[0].node,
+@@ -239,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ 		return err;
+ 	}
+ 
++	nf_ct_offload_timeout(flow->ct);
++
++	if (nf_flowtable_hw_offload(flow_table)) {
++		__set_bit(NF_FLOW_HW, &flow->flags);
++		nf_flow_offload_add(flow_table, flow);
++	}
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_add);
+ 
++void flow_offload_refresh(struct nf_flowtable *flow_table,
++			  struct flow_offload *flow)
++{
++	u32 timeout;
++
++	timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
++	if (timeout - READ_ONCE(flow->timeout) > HZ)
++		WRITE_ONCE(flow->timeout, timeout);
++	else
++		return;
++
++	if (likely(!nf_flowtable_hw_offload(flow_table)))
++		return;
++
++	nf_flow_offload_add(flow_table, flow);
++}
++EXPORT_SYMBOL_GPL(flow_offload_refresh);
++
+ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+ {
+ 	return nf_flow_timeout_delta(flow->timeout) <= 0;
+@@ -251,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
+ static void flow_offload_del(struct nf_flowtable *flow_table,
+ 			     struct flow_offload *flow)
+ {
+-	struct flow_offload_entry *e;
+-
+ 	rhashtable_remove_fast(&flow_table->rhashtable,
+ 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
+ 			       nf_flow_offload_rhash_params);
+@@ -260,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
+ 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
+ 			       nf_flow_offload_rhash_params);
+ 
+-	e = container_of(flow, struct flow_offload_entry, flow);
+-	clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
++	clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
+ 
+ 	if (nf_flow_has_expired(flow))
+-		flow_offload_fixup_ct(e->ct);
+-	else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
+-		flow_offload_fixup_ct_timeout(e->ct);
+-
+-	if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
+-		flow_offload_fixup_ct_state(e->ct);
++		flow_offload_fixup_ct(flow->ct);
++	else
++		flow_offload_fixup_ct_timeout(flow->ct);
+ 
+ 	flow_offload_free(flow);
+ }
+ 
+ void flow_offload_teardown(struct flow_offload *flow)
+ {
+-	struct flow_offload_entry *e;
++	set_bit(NF_FLOW_TEARDOWN, &flow->flags);
+ 
+-	flow->flags |= FLOW_OFFLOAD_TEARDOWN;
+-
+-	e = container_of(flow, struct flow_offload_entry, flow);
+-	flow_offload_fixup_ct_state(e->ct);
++	flow_offload_fixup_ct_state(flow->ct);
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_teardown);
+ 
+@@ -291,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
+ {
+ 	struct flow_offload_tuple_rhash *tuplehash;
+ 	struct flow_offload *flow;
+-	struct flow_offload_entry *e;
+ 	int dir;
+ 
+ 	tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
+@@ -301,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
+ 
+ 	dir = tuplehash->tuple.dir;
+ 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+-	if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
++	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
+ 		return NULL;
+ 
+-	e = container_of(flow, struct flow_offload_entry, flow);
+-	if (unlikely(nf_ct_is_dying(e->ct)))
++	if (unlikely(nf_ct_is_dying(flow->ct)))
+ 		return NULL;
+ 
+ 	return tuplehash;
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_lookup);
+ 
+-static int
+-nf_flow_table_iterate(struct nf_flowtable *flow_table,
++int nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ 		      void (*iter)(struct flow_offload *flow, void *data),
+ 		      void *data)
+ {
+@@ -326,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ 	rhashtable_walk_start(&hti);
+ 
+ 	while ((tuplehash = rhashtable_walk_next(&hti))) {
+-
+ 		if (IS_ERR(tuplehash)) {
+ 			if (PTR_ERR(tuplehash) != -EAGAIN) {
+ 				err = PTR_ERR(tuplehash);
+@@ -346,23 +430,49 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ 
+ 	return err;
+ }
++EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
+ 
+-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
++static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
+ {
+-	struct nf_flowtable *flow_table = data;
+-	struct flow_offload_entry *e;
+-	bool teardown;
++	struct dst_entry *dst;
+ 
+-	e = container_of(flow, struct flow_offload_entry, flow);
++	if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
++	    tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
++		dst = tuple->dst_cache;
++		if (!dst_check(dst, tuple->dst_cookie))
++			return true;
++	}
+ 
+-	teardown = flow->flags & (FLOW_OFFLOAD_DYING |
+-				  FLOW_OFFLOAD_TEARDOWN);
++	return false;
++}
+ 
+-	if (!teardown)
+-		nf_ct_offload_timeout(flow);
++static bool nf_flow_has_stale_dst(struct flow_offload *flow)
++{
++	return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
++	       flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
++}
+ 
+-	if (nf_flow_has_expired(flow) || teardown)
+-		flow_offload_del(flow_table, flow);
++static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
++{
++	struct nf_flowtable *flow_table = data;
++
++	if (nf_flow_has_expired(flow) ||
++	    nf_ct_is_dying(flow->ct) ||
++	    nf_flow_has_stale_dst(flow))
++		set_bit(NF_FLOW_TEARDOWN, &flow->flags);
++
++	if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
++		if (test_bit(NF_FLOW_HW, &flow->flags)) {
++			if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
++				nf_flow_offload_del(flow_table, flow);
++			else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
++				flow_offload_del(flow_table, flow);
++		} else {
++			flow_offload_del(flow_table, flow);
++		}
++	} else if (test_bit(NF_FLOW_HW, &flow->flags)) {
++		nf_flow_offload_stats(flow_table, flow);
++	}
+ }
+ 
+ static void nf_flow_offload_work_gc(struct work_struct *work)
+@@ -374,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
+ 	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
+ }
+ 
+-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+-				__be16 port, __be16 new_port)
++static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
++				 __be16 port, __be16 new_port)
+ {
+ 	struct tcphdr *tcph;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+-		return -1;
+-
+ 	tcph = (void *)(skb_network_header(skb) + thoff);
+ 	inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+-				__be16 port, __be16 new_port)
++static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
++				 __be16 port, __be16 new_port)
+ {
+ 	struct udphdr *udph;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
+-		return -1;
+-
+ 	udph = (void *)(skb_network_header(skb) + thoff);
+ 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ 		inet_proto_csum_replace2(&udph->check, skb, port,
+@@ -405,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ 		if (!udph->check)
+ 			udph->check = CSUM_MANGLED_0;
+ 	}
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+-			    u8 protocol, __be16 port, __be16 new_port)
++static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
++			     u8 protocol, __be16 port, __be16 new_port)
+ {
+ 	switch (protocol) {
+ 	case IPPROTO_TCP:
+-		if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
+-			return NF_DROP;
++		nf_flow_nat_port_tcp(skb, thoff, port, new_port);
+ 		break;
+ 	case IPPROTO_UDP:
+-		if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
+-			return NF_DROP;
++		nf_flow_nat_port_udp(skb, thoff, port, new_port);
+ 		break;
+ 	}
+-
+-	return 0;
+ }
+ 
+-int nf_flow_snat_port(const struct flow_offload *flow,
+-		      struct sk_buff *skb, unsigned int thoff,
+-		      u8 protocol, enum flow_offload_tuple_dir dir)
++void nf_flow_snat_port(const struct flow_offload *flow,
++		       struct sk_buff *skb, unsigned int thoff,
++		       u8 protocol, enum flow_offload_tuple_dir dir)
+ {
+ 	struct flow_ports *hdr;
+ 	__be16 port, new_port;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+-		return -1;
+-
+ 	hdr = (void *)(skb_network_header(skb) + thoff);
+ 
+ 	switch (dir) {
+@@ -450,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
+ 		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ 		hdr->dest = new_port;
+ 		break;
+-	default:
+-		return -1;
+ 	}
+ 
+-	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_snat_port);
+ 
+-int nf_flow_dnat_port(const struct flow_offload *flow,
+-		      struct sk_buff *skb, unsigned int thoff,
+-		      u8 protocol, enum flow_offload_tuple_dir dir)
++void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
++		       unsigned int thoff, u8 protocol,
++		       enum flow_offload_tuple_dir dir)
+ {
+ 	struct flow_ports *hdr;
+ 	__be16 port, new_port;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*hdr)))
+-		return -1;
+-
+ 	hdr = (void *)(skb_network_header(skb) + thoff);
+ 
+ 	switch (dir) {
+@@ -482,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
+ 		new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
+ 		hdr->source = new_port;
+ 		break;
+-	default:
+-		return -1;
+ 	}
+ 
+-	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
++	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
+ 
+@@ -494,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
+ {
+ 	int err;
+ 
+-	INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
++	INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
++	flow_block_init(&flowtable->flow_block);
++	init_rwsem(&flowtable->flow_block_lock);
+ 
+ 	err = rhashtable_init(&flowtable->rhashtable,
+ 			      &nf_flow_offload_rhash_params);
+@@ -515,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
+ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+ {
+ 	struct net_device *dev = data;
+-	struct flow_offload_entry *e;
+-
+-	e = container_of(flow, struct flow_offload_entry, flow);
+ 
+ 	if (!dev) {
+ 		flow_offload_teardown(flow);
+ 		return;
+ 	}
+-	if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
++
++	if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
+ 	    (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
+ 	     flow->tuplehash[1].tuple.iifidx == dev->ifindex))
+-		flow_offload_dead(flow);
++		flow_offload_teardown(flow);
+ }
+ 
+-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+-					  struct net_device *dev)
++void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
++			      struct net_device *dev)
+ {
+ 	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
+ 	flush_delayed_work(&flowtable->gc_work);
++	nf_flow_table_offload_flush(flowtable);
+ }
+ 
+ void nf_flow_table_cleanup(struct net_device *dev)
+@@ -542,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
+ 
+ 	mutex_lock(&flowtable_lock);
+ 	list_for_each_entry(flowtable, &flowtables, list)
+-		nf_flow_table_iterate_cleanup(flowtable, dev);
++		nf_flow_table_gc_cleanup(flowtable, dev);
+ 	mutex_unlock(&flowtable_lock);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+@@ -552,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
+ 	mutex_lock(&flowtable_lock);
+ 	list_del(&flow_table->list);
+ 	mutex_unlock(&flowtable_lock);
++
+ 	cancel_delayed_work_sync(&flow_table->gc_work);
+ 	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+ 	nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
++	nf_flow_table_offload_flush(flow_table);
++	if (nf_flowtable_hw_offload(flow_table))
++		nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
++				      flow_table);
+ 	rhashtable_destroy(&flow_table->rhashtable);
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_table_free);
+@@ -578,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
+ 
+ static int __init nf_flow_table_module_init(void)
+ {
+-	return register_netdevice_notifier(&flow_offload_netdev_notifier);
++	int ret;
++
++	ret = nf_flow_table_offload_init();
++	if (ret)
++		return ret;
++
++	ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
++	if (ret)
++		nf_flow_table_offload_exit();
++
++	return ret;
+ }
+ 
+ static void __exit nf_flow_table_module_exit(void)
+ {
+ 	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++	nf_flow_table_offload_exit();
+ }
+ 
+ module_init(nf_flow_table_module_init);
+@@ -591,3 +690,4 @@ module_exit(nf_flow_table_module_exit);
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
++MODULE_DESCRIPTION("Netfilter flow table module");
+diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
+index e92aa6b7e..6257d87c3 100644
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -7,11 +7,13 @@
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
+ #include <linux/netdevice.h>
++#include <linux/if_ether.h>
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+ #include <net/neighbour.h>
+ #include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_conntrack_acct.h>
+ /* For layer 4 checksum field offset. */
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
+@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
+ 	if (proto != IPPROTO_TCP)
+ 		return 0;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
+-		return -1;
+-
+ 	tcph = (void *)(skb_network_header(skb) + thoff);
+ 	if (unlikely(tcph->fin || tcph->rst)) {
+ 		flow_offload_teardown(flow);
+@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
+ 	return 0;
+ }
+ 
+-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+-			      __be32 addr, __be32 new_addr)
++static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
++			       __be32 addr, __be32 new_addr)
+ {
+ 	struct tcphdr *tcph;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+-		return -1;
+-
+ 	tcph = (void *)(skb_network_header(skb) + thoff);
+ 	inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+-			      __be32 addr, __be32 new_addr)
++static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
++			       __be32 addr, __be32 new_addr)
+ {
+ 	struct udphdr *udph;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
+-		return -1;
+-
+ 	udph = (void *)(skb_network_header(skb) + thoff);
+ 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ 		inet_proto_csum_replace4(&udph->check, skb, addr,
+@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
+ 		if (!udph->check)
+ 			udph->check = CSUM_MANGLED_0;
+ 	}
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
+-				  unsigned int thoff, __be32 addr,
+-				  __be32 new_addr)
++static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
++				   unsigned int thoff, __be32 addr,
++				   __be32 new_addr)
+ {
+ 	switch (iph->protocol) {
+ 	case IPPROTO_TCP:
+-		if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
+-			return NF_DROP;
++		nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
+ 		break;
+ 	case IPPROTO_UDP:
+-		if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
+-			return NF_DROP;
++		nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
+ 		break;
+ 	}
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+-			   struct iphdr *iph, unsigned int thoff,
+-			   enum flow_offload_tuple_dir dir)
++static void nf_flow_snat_ip(const struct flow_offload *flow,
++			    struct sk_buff *skb, struct iphdr *iph,
++			    unsigned int thoff, enum flow_offload_tuple_dir dir)
+ {
+ 	__be32 addr, new_addr;
+ 
+@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
+ 		iph->daddr = new_addr;
+ 		break;
+-	default:
+-		return -1;
+ 	}
+ 	csum_replace4(&iph->check, addr, new_addr);
+ 
+-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ }
+ 
+-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+-			   struct iphdr *iph, unsigned int thoff,
+-			   enum flow_offload_tuple_dir dir)
++static void nf_flow_dnat_ip(const struct flow_offload *flow,
++			    struct sk_buff *skb, struct iphdr *iph,
++			    unsigned int thoff, enum flow_offload_tuple_dir dir)
+ {
+ 	__be32 addr, new_addr;
+ 
+@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+ 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
+ 		iph->saddr = new_addr;
+ 		break;
+-	default:
+-		return -1;
+ 	}
+ 	csum_replace4(&iph->check, addr, new_addr);
+ 
+-	return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
++	nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
+ }
+ 
+-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+-			  unsigned int thoff, enum flow_offload_tuple_dir dir)
++static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
++			  unsigned int thoff, enum flow_offload_tuple_dir dir,
++			  struct iphdr *iph)
+ {
+-	struct iphdr *iph = ip_hdr(skb);
+-
+-	if (flow->flags & FLOW_OFFLOAD_SNAT &&
+-	    (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+-	     nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
+-		return -1;
+-	if (flow->flags & FLOW_OFFLOAD_DNAT &&
+-	    (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+-	     nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
+-		return -1;
+-
+-	return 0;
++	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++		nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
++		nf_flow_snat_ip(flow, skb, iph, thoff, dir);
++	}
++	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++		nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
++		nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
++	}
+ }
+ 
+ static bool ip_has_options(unsigned int thoff)
+@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
+ 	return thoff != sizeof(struct iphdr);
+ }
+ 
++static void nf_flow_tuple_encap(struct sk_buff *skb,
++				struct flow_offload_tuple *tuple)
++{
++	struct vlan_ethhdr *veth;
++	struct pppoe_hdr *phdr;
++	int i = 0;
++
++	if (skb_vlan_tag_present(skb)) {
++		tuple->encap[i].id = skb_vlan_tag_get(skb);
++		tuple->encap[i].proto = skb->vlan_proto;
++		i++;
++	}
++	switch (skb->protocol) {
++	case htons(ETH_P_8021Q):
++		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
++		tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
++		tuple->encap[i].proto = skb->protocol;
++		break;
++	case htons(ETH_P_PPP_SES):
++		phdr = (struct pppoe_hdr *)skb_mac_header(skb);
++		tuple->encap[i].id = ntohs(phdr->sid);
++		tuple->encap[i].proto = skb->protocol;
++		break;
++	}
++}
++
+ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+-			    struct flow_offload_tuple *tuple)
++			    struct flow_offload_tuple *tuple, u32 *hdrsize,
++			    u32 offset)
+ {
+ 	struct flow_ports *ports;
+ 	unsigned int thoff;
+ 	struct iphdr *iph;
+ 
+-	if (!pskb_may_pull(skb, sizeof(*iph)))
++	if (!pskb_may_pull(skb, sizeof(*iph) + offset))
+ 		return -1;
+ 
+-	iph = ip_hdr(skb);
+-	thoff = iph->ihl * 4;
++	iph = (struct iphdr *)(skb_network_header(skb) + offset);
++	thoff = (iph->ihl * 4);
+ 
+ 	if (ip_is_fragment(iph) ||
+ 	    unlikely(ip_has_options(thoff)))
+ 		return -1;
+ 
+-	if (iph->protocol != IPPROTO_TCP &&
+-	    iph->protocol != IPPROTO_UDP)
++	thoff += offset;
++
++	switch (iph->protocol) {
++	case IPPROTO_TCP:
++		*hdrsize = sizeof(struct tcphdr);
++		break;
++	case IPPROTO_UDP:
++		*hdrsize = sizeof(struct udphdr);
++		break;
++	default:
+ 		return -1;
++	}
+ 
+ 	if (iph->ttl <= 1)
+ 		return -1;
+ 
+-	thoff = iph->ihl * 4;
+-	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++	if (!pskb_may_pull(skb, thoff + *hdrsize))
+ 		return -1;
+ 
+-	iph = ip_hdr(skb);
++	iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ 
+ 	tuple->src_v4.s_addr	= iph->saddr;
+@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
+ 	tuple->l3proto		= AF_INET;
+ 	tuple->l4proto		= iph->protocol;
+ 	tuple->iifidx		= dev->ifindex;
++	nf_flow_tuple_encap(skb, tuple);
+ 
+ 	return 0;
+ }
+@@ -215,14 +227,6 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+ 	return true;
+ }
+ 
+-static int nf_flow_offload_dst_check(struct dst_entry *dst)
+-{
+-	if (unlikely(dst_xfrm(dst)))
+-		return dst_check(dst, 0) ? 0 : -1;
+-
+-	return 0;
+-}
+-
+ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+ 				      const struct nf_hook_state *state,
+ 				      struct dst_entry *dst)
+@@ -233,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
+ 	return NF_STOLEN;
+ }
+ 
++static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
++				       u32 *offset)
++{
++	struct vlan_ethhdr *veth;
++
++	switch (skb->protocol) {
++	case htons(ETH_P_8021Q):
++		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
++		if (veth->h_vlan_encapsulated_proto == proto) {
++			*offset += VLAN_HLEN;
++			return true;
++		}
++		break;
++	case htons(ETH_P_PPP_SES):
++		if (nf_flow_pppoe_proto(skb) == proto) {
++			*offset += PPPOE_SES_HLEN;
++			return true;
++		}
++		break;
++	}
++
++	return false;
++}
++
++static void nf_flow_encap_pop(struct sk_buff *skb,
++			      struct flow_offload_tuple_rhash *tuplehash)
++{
++	struct vlan_hdr *vlan_hdr;
++	int i;
++
++	for (i = 0; i < tuplehash->tuple.encap_num; i++) {
++		if (skb_vlan_tag_present(skb)) {
++			__vlan_hwaccel_clear_tag(skb);
++			continue;
++		}
++		switch (skb->protocol) {
++		case htons(ETH_P_8021Q):
++			vlan_hdr = (struct vlan_hdr *)skb->data;
++			__skb_pull(skb, VLAN_HLEN);
++			vlan_set_encap_proto(skb, vlan_hdr);
++			skb_reset_network_header(skb);
++			break;
++		case htons(ETH_P_PPP_SES):
++			skb->protocol = nf_flow_pppoe_proto(skb);
++			skb_pull(skb, PPPOE_SES_HLEN);
++			skb_reset_network_header(skb);
++			break;
++		}
++	}
++}
++
++static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
++				       const struct flow_offload_tuple_rhash *tuplehash,
++				       unsigned short type)
++{
++	struct net_device *outdev;
++
++	outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
++	if (!outdev)
++		return NF_DROP;
++
++	skb->dev = outdev;
++	dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
++			tuplehash->tuple.out.h_source, skb->len);
++	dev_queue_xmit(skb);
++
++	return NF_STOLEN;
++}
++
+ unsigned int
+ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ 			const struct nf_hook_state *state)
+@@ -243,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ 	enum flow_offload_tuple_dir dir;
+ 	struct flow_offload *flow;
+ 	struct net_device *outdev;
++	u32 hdrsize, offset = 0;
++	unsigned int thoff, mtu;
+ 	struct rtable *rt;
+-	unsigned int thoff;
+ 	struct iphdr *iph;
+ 	__be32 nexthop;
++	int ret;
+ 
+-	if (skb->protocol != htons(ETH_P_IP))
++	if (skb->protocol != htons(ETH_P_IP) &&
++	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
+ 		return NF_ACCEPT;
+ 
+-	if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
++	if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
+ 		return NF_ACCEPT;
+ 
+ 	tuplehash = flow_offload_lookup(flow_table, &tuple);
+@@ -260,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ 
+ 	dir = tuplehash->tuple.dir;
+ 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+-	rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+-	outdev = rt->dst.dev;
+-
+-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+-		return NF_ACCEPT;
+-
+-	if (skb_try_make_writable(skb, sizeof(*iph)))
+-		return NF_DROP;
+ 
+-	thoff = ip_hdr(skb)->ihl * 4;
+-	if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
++	mtu = flow->tuplehash[dir].tuple.mtu + offset;
++	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
+ 		return NF_ACCEPT;
+ 
+-	if (nf_flow_offload_dst_check(&rt->dst)) {
+-		flow_offload_teardown(flow);
++	iph = (struct iphdr *)(skb_network_header(skb) + offset);
++	thoff = (iph->ihl * 4) + offset;
++	if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
+ 		return NF_ACCEPT;
+-	}
+ 
+-	if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
++	if (skb_try_make_writable(skb, thoff + hdrsize))
+ 		return NF_DROP;
+ 
+-	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
++	flow_offload_refresh(flow_table, flow);
++
++	nf_flow_encap_pop(skb, tuplehash);
++	thoff -= offset;
++
+ 	iph = ip_hdr(skb);
++	nf_flow_nat_ip(flow, skb, thoff, dir, iph);
++
+ 	ip_decrease_ttl(iph);
+ 	skb->tstamp = 0;
+ 
+-	if (unlikely(dst_xfrm(&rt->dst))) {
++	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
++		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
++
++	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
++		rt = (struct rtable *)tuplehash->tuple.dst_cache;
+ 		memset(skb->cb, 0, sizeof(struct inet_skb_parm));
+ 		IPCB(skb)->iif = skb->dev->ifindex;
+ 		IPCB(skb)->flags = IPSKB_FORWARDED;
+ 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+ 	}
+ 
+-	skb->dev = outdev;
+-	nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+-	skb_dst_set_noref(skb, &rt->dst);
+-	neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
++	switch (tuplehash->tuple.xmit_type) {
++	case FLOW_OFFLOAD_XMIT_NEIGH:
++		rt = (struct rtable *)tuplehash->tuple.dst_cache;
++		outdev = rt->dst.dev;
++		skb->dev = outdev;
++		nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
++		skb_dst_set_noref(skb, &rt->dst);
++		neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
++		ret = NF_STOLEN;
++		break;
++	case FLOW_OFFLOAD_XMIT_DIRECT:
++		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
++		if (ret == NF_DROP)
++			flow_offload_teardown(flow);
++		break;
++	}
+ 
+-	return NF_STOLEN;
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
+ 
+-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+-				struct in6_addr *addr,
+-				struct in6_addr *new_addr)
++static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
++				 struct in6_addr *addr,
++				 struct in6_addr *new_addr,
++				 struct ipv6hdr *ip6h)
+ {
+ 	struct tcphdr *tcph;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+-		return -1;
+-
+ 	tcph = (void *)(skb_network_header(skb) + thoff);
+ 	inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ 				  new_addr->s6_addr32, true);
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+-				struct in6_addr *addr,
+-				struct in6_addr *new_addr)
++static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
++				 struct in6_addr *addr,
++				 struct in6_addr *new_addr)
+ {
+ 	struct udphdr *udph;
+ 
+-	if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+-	    skb_try_make_writable(skb, thoff + sizeof(*udph)))
+-		return -1;
+-
+ 	udph = (void *)(skb_network_header(skb) + thoff);
+ 	if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ 		inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+@@ -336,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ 		if (!udph->check)
+ 			udph->check = CSUM_MANGLED_0;
+ 	}
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+-				    unsigned int thoff, struct in6_addr *addr,
+-				    struct in6_addr *new_addr)
++static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
++				     unsigned int thoff, struct in6_addr *addr,
++				     struct in6_addr *new_addr)
+ {
+ 	switch (ip6h->nexthdr) {
+ 	case IPPROTO_TCP:
+-		if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+-			return NF_DROP;
++		nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
+ 		break;
+ 	case IPPROTO_UDP:
+-		if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+-			return NF_DROP;
++		nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
+ 		break;
+ 	}
+-
+-	return 0;
+ }
+ 
+-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
+-			     unsigned int thoff,
+-			     enum flow_offload_tuple_dir dir)
++static void nf_flow_snat_ipv6(const struct flow_offload *flow,
++			      struct sk_buff *skb, struct ipv6hdr *ip6h,
++			      unsigned int thoff,
++			      enum flow_offload_tuple_dir dir)
+ {
+ 	struct in6_addr addr, new_addr;
+ 
+@@ -376,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ 		ip6h->daddr = new_addr;
+ 		break;
+-	default:
+-		return -1;
+ 	}
+ 
+-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ }
+ 
+-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+-			     struct sk_buff *skb, struct ipv6hdr *ip6h,
+-			     unsigned int thoff,
+-			     enum flow_offload_tuple_dir dir)
++static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
++			      struct sk_buff *skb, struct ipv6hdr *ip6h,
++			      unsigned int thoff,
++			      enum flow_offload_tuple_dir dir)
+ {
+ 	struct in6_addr addr, new_addr;
+ 
+@@ -401,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ 		new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ 		ip6h->saddr = new_addr;
+ 		break;
+-	default:
+-		return -1;
+ 	}
+ 
+-	return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
++	nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+ }
+ 
+-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+-			    struct sk_buff *skb,
+-			    enum flow_offload_tuple_dir dir)
++static void nf_flow_nat_ipv6(const struct flow_offload *flow,
++			     struct sk_buff *skb,
++			     enum flow_offload_tuple_dir dir,
++			     struct ipv6hdr *ip6h)
+ {
+-	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ 	unsigned int thoff = sizeof(*ip6h);
+ 
+-	if (flow->flags & FLOW_OFFLOAD_SNAT &&
+-	    (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+-	     nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+-		return -1;
+-	if (flow->flags & FLOW_OFFLOAD_DNAT &&
+-	    (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+-	     nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+-		return -1;
+-
+-	return 0;
++	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++		nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
++		nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
++	}
++	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++		nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
++		nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
++	}
+ }
+ 
+ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+-			      struct flow_offload_tuple *tuple)
++			      struct flow_offload_tuple *tuple, u32 *hdrsize,
++			      u32 offset)
+ {
+ 	struct flow_ports *ports;
+ 	struct ipv6hdr *ip6h;
+ 	unsigned int thoff;
+ 
+-	if (!pskb_may_pull(skb, sizeof(*ip6h)))
++	thoff = sizeof(*ip6h) + offset;
++	if (!pskb_may_pull(skb, thoff))
+ 		return -1;
+ 
+-	ip6h = ipv6_hdr(skb);
++	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ 
+-	if (ip6h->nexthdr != IPPROTO_TCP &&
+-	    ip6h->nexthdr != IPPROTO_UDP)
++	switch (ip6h->nexthdr) {
++	case IPPROTO_TCP:
++		*hdrsize = sizeof(struct tcphdr);
++		break;
++	case IPPROTO_UDP:
++		*hdrsize = sizeof(struct udphdr);
++		break;
++	default:
+ 		return -1;
++	}
+ 
+ 	if (ip6h->hop_limit <= 1)
+ 		return -1;
+ 
+-	thoff = sizeof(*ip6h);
+-	if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
++	if (!pskb_may_pull(skb, thoff + *hdrsize))
+ 		return -1;
+ 
+-	ip6h = ipv6_hdr(skb);
++	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ 	ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+ 
+ 	tuple->src_v6		= ip6h->saddr;
+@@ -460,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ 	tuple->l3proto		= AF_INET6;
+ 	tuple->l4proto		= ip6h->nexthdr;
+ 	tuple->iifidx		= dev->ifindex;
++	nf_flow_tuple_encap(skb, tuple);
+ 
+ 	return 0;
+ }
+@@ -475,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ 	const struct in6_addr *nexthop;
+ 	struct flow_offload *flow;
+ 	struct net_device *outdev;
++	unsigned int thoff, mtu;
++	u32 hdrsize, offset = 0;
+ 	struct ipv6hdr *ip6h;
+ 	struct rt6_info *rt;
++	int ret;
+ 
+-	if (skb->protocol != htons(ETH_P_IPV6))
++	if (skb->protocol != htons(ETH_P_IPV6) &&
++	    !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
+ 		return NF_ACCEPT;
+ 
+-	if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
++	if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
+ 		return NF_ACCEPT;
+ 
+ 	tuplehash = flow_offload_lookup(flow_table, &tuple);
+@@ -490,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ 
+ 	dir = tuplehash->tuple.dir;
+ 	flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+-	rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+-	outdev = rt->dst.dev;
+-
+-	if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+-		return NF_ACCEPT;
+ 
+-	if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
+-				sizeof(*ip6h)))
++	mtu = flow->tuplehash[dir].tuple.mtu + offset;
++	if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
+ 		return NF_ACCEPT;
+ 
+-	if (nf_flow_offload_dst_check(&rt->dst)) {
+-		flow_offload_teardown(flow);
++	ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
++	thoff = sizeof(*ip6h) + offset;
++	if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
+ 		return NF_ACCEPT;
+-	}
+ 
+-	if (skb_try_make_writable(skb, sizeof(*ip6h)))
++	if (skb_try_make_writable(skb, thoff + hdrsize))
+ 		return NF_DROP;
+ 
+-	if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
+-		return NF_DROP;
++	flow_offload_refresh(flow_table, flow);
++
++	nf_flow_encap_pop(skb, tuplehash);
+ 
+-	flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ 	ip6h = ipv6_hdr(skb);
++	nf_flow_nat_ipv6(flow, skb, dir, ip6h);
++
+ 	ip6h->hop_limit--;
+ 	skb->tstamp = 0;
+ 
+-	if (unlikely(dst_xfrm(&rt->dst))) {
++	if (flow_table->flags & NF_FLOWTABLE_COUNTER)
++		nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
++
++	if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
++		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
+ 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ 		IP6CB(skb)->iif = skb->dev->ifindex;
+ 		IP6CB(skb)->flags = IP6SKB_FORWARDED;
+ 		return nf_flow_xmit_xfrm(skb, state, &rt->dst);
+ 	}
+ 
+-	skb->dev = outdev;
+-	nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+-	skb_dst_set_noref(skb, &rt->dst);
+-	neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
++	switch (tuplehash->tuple.xmit_type) {
++	case FLOW_OFFLOAD_XMIT_NEIGH:
++		rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
++		outdev = rt->dst.dev;
++		skb->dev = outdev;
++		nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
++		skb_dst_set_noref(skb, &rt->dst);
++		neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
++		ret = NF_STOLEN;
++		break;
++	case FLOW_OFFLOAD_XMIT_DIRECT:
++		ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
++		if (ret == NF_DROP)
++			flow_offload_teardown(flow);
++		break;
++	}
+ 
+-	return NF_STOLEN;
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+new file mode 100644
+index 000000000..d94c6fb92
+--- /dev/null
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -0,0 +1,1191 @@
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/netfilter.h>
++#include <linux/rhashtable.h>
++#include <linux/netdevice.h>
++#include <linux/tc_act/tc_csum.h>
++#include <net/flow_offload.h>
++#include <net/netfilter/nf_flow_table.h>
++#include <net/netfilter/nf_tables.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_acct.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <net/netfilter/nf_conntrack_tuple.h>
++
++static struct workqueue_struct *nf_flow_offload_add_wq;
++static struct workqueue_struct *nf_flow_offload_del_wq;
++static struct workqueue_struct *nf_flow_offload_stats_wq;
++
++struct flow_offload_work {
++	struct list_head	list;
++	enum flow_cls_command	cmd;
++	int			priority;
++	struct nf_flowtable	*flowtable;
++	struct flow_offload	*flow;
++	struct work_struct	work;
++};
++
++#define NF_FLOW_DISSECTOR(__match, __type, __field)	\
++	(__match)->dissector.offset[__type] =		\
++		offsetof(struct nf_flow_key, __field)
++
++static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
++				   struct ip_tunnel_info *tun_info)
++{
++	struct nf_flow_key *mask = &match->mask;
++	struct nf_flow_key *key = &match->key;
++	unsigned int enc_keys;
++
++	if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
++		return;
++
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
++	key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
++	mask->enc_key_id.keyid = 0xffffffff;
++	enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
++		   BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
++
++	if (ip_tunnel_info_af(tun_info) == AF_INET) {
++		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
++				  enc_ipv4);
++		key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
++		key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
++		if (key->enc_ipv4.src)
++			mask->enc_ipv4.src = 0xffffffff;
++		if (key->enc_ipv4.dst)
++			mask->enc_ipv4.dst = 0xffffffff;
++		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
++		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
++	} else {
++		memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
++		       sizeof(struct in6_addr));
++		memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
++		       sizeof(struct in6_addr));
++		if (memcmp(&key->enc_ipv6.src, &in6addr_any,
++			   sizeof(struct in6_addr)))
++			memset(&mask->enc_ipv6.src, 0xff,
++			       sizeof(struct in6_addr));
++		if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
++			   sizeof(struct in6_addr)))
++			memset(&mask->enc_ipv6.dst, 0xff,
++			       sizeof(struct in6_addr));
++		enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
++		key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
++	}
++
++	match->dissector.used_keys |= enc_keys;
++}
++
++static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
++				    struct flow_dissector_key_vlan *mask,
++				    u16 vlan_id, __be16 proto)
++{
++	key->vlan_id = vlan_id;
++	mask->vlan_id = VLAN_VID_MASK;
++	key->vlan_tpid = proto;
++	mask->vlan_tpid = 0xffff;
++}
++
++static int nf_flow_rule_match(struct nf_flow_match *match,
++			      const struct flow_offload_tuple *tuple,
++			      struct dst_entry *other_dst)
++{
++	struct nf_flow_key *mask = &match->mask;
++	struct nf_flow_key *key = &match->key;
++	struct ip_tunnel_info *tun_info;
++	bool vlan_encap = false;
++
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
++	NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
++
++	if (other_dst && other_dst->lwtstate) {
++		tun_info = lwt_tun_info(other_dst->lwtstate);
++		nf_flow_rule_lwt_match(match, tun_info);
++	}
++
++	key->meta.ingress_ifindex = tuple->iifidx;
++	mask->meta.ingress_ifindex = 0xffffffff;
++
++	if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
++	    tuple->encap[0].proto == htons(ETH_P_8021Q)) {
++		NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
++		nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
++					tuple->encap[0].id,
++					tuple->encap[0].proto);
++		vlan_encap = true;
++	}
++
++	if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
++	    tuple->encap[1].proto == htons(ETH_P_8021Q)) {
++		if (vlan_encap) {
++			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
++					  cvlan);
++			nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
++						tuple->encap[1].id,
++						tuple->encap[1].proto);
++		} else {
++			NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
++					  vlan);
++			nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
++						tuple->encap[1].id,
++						tuple->encap[1].proto);
++		}
++	}
++
++	switch (tuple->l3proto) {
++	case AF_INET:
++		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
++		key->basic.n_proto = htons(ETH_P_IP);
++		key->ipv4.src = tuple->src_v4.s_addr;
++		mask->ipv4.src = 0xffffffff;
++		key->ipv4.dst = tuple->dst_v4.s_addr;
++		mask->ipv4.dst = 0xffffffff;
++		break;
++       case AF_INET6:
++		key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
++		key->basic.n_proto = htons(ETH_P_IPV6);
++		key->ipv6.src = tuple->src_v6;
++		memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
++		key->ipv6.dst = tuple->dst_v6;
++		memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
++		break;
++	default:
++		return -EOPNOTSUPP;
++	}
++	mask->control.addr_type = 0xffff;
++	match->dissector.used_keys |= BIT(key->control.addr_type);
++	mask->basic.n_proto = 0xffff;
++
++	switch (tuple->l4proto) {
++	case IPPROTO_TCP:
++		key->tcp.flags = 0;
++		mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
++		match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
++		break;
++	case IPPROTO_UDP:
++		break;
++	default:
++		return -EOPNOTSUPP;
++	}
++
++	key->basic.ip_proto = tuple->l4proto;
++	mask->basic.ip_proto = 0xff;
++
++	key->tp.src = tuple->src_port;
++	mask->tp.src = 0xffff;
++	key->tp.dst = tuple->dst_port;
++	mask->tp.dst = 0xffff;
++
++	match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
++				      BIT(FLOW_DISSECTOR_KEY_CONTROL) |
++				      BIT(FLOW_DISSECTOR_KEY_BASIC) |
++				      BIT(FLOW_DISSECTOR_KEY_PORTS);
++	return 0;
++}
++
++static void flow_offload_mangle(struct flow_action_entry *entry,
++				enum flow_action_mangle_base htype, u32 offset,
++				const __be32 *value, const __be32 *mask)
++{
++	entry->id = FLOW_ACTION_MANGLE;
++	entry->mangle.htype = htype;
++	entry->mangle.offset = offset;
++	memcpy(&entry->mangle.mask, mask, sizeof(u32));
++	memcpy(&entry->mangle.val, value, sizeof(u32));
++}
++
++static inline struct flow_action_entry *
++flow_action_entry_next(struct nf_flow_rule *flow_rule)
++{
++	int i = flow_rule->rule->action.num_entries++;
++
++	return &flow_rule->rule->action.entries[i];
++}
++
++static int flow_offload_eth_src(struct net *net,
++				const struct flow_offload *flow,
++				enum flow_offload_tuple_dir dir,
++				struct nf_flow_rule *flow_rule)
++{
++	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
++	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
++	const struct flow_offload_tuple *other_tuple, *this_tuple;
++	struct net_device *dev = NULL;
++	const unsigned char *addr;
++	u32 mask, val;
++	u16 val16;
++
++	this_tuple = &flow->tuplehash[dir].tuple;
++
++	switch (this_tuple->xmit_type) {
++	case FLOW_OFFLOAD_XMIT_DIRECT:
++		addr = this_tuple->out.h_source;
++		break;
++	case FLOW_OFFLOAD_XMIT_NEIGH:
++		other_tuple = &flow->tuplehash[!dir].tuple;
++		dev = dev_get_by_index(net, other_tuple->iifidx);
++		if (!dev)
++			return -ENOENT;
++
++		addr = dev->dev_addr;
++		break;
++	default:
++		return -EOPNOTSUPP;
++	}
++
++	mask = ~0xffff0000;
++	memcpy(&val16, addr, 2);
++	val = val16 << 16;
++	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
++			    &val, &mask);
++
++	mask = ~0xffffffff;
++	memcpy(&val, addr + 2, 4);
++	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
++			    &val, &mask);
++
++	if (dev)
++		dev_put(dev);
++
++	return 0;
++}
++
++static int flow_offload_eth_dst(struct net *net,
++				const struct flow_offload *flow,
++				enum flow_offload_tuple_dir dir,
++				struct nf_flow_rule *flow_rule)
++{
++	struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
++	struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
++	const struct flow_offload_tuple *other_tuple, *this_tuple;
++	const struct dst_entry *dst_cache;
++	unsigned char ha[ETH_ALEN];
++	struct neighbour *n;
++	const void *daddr;
++	u32 mask, val;
++	u8 nud_state;
++	u16 val16;
++
++	this_tuple = &flow->tuplehash[dir].tuple;
++
++	switch (this_tuple->xmit_type) {
++	case FLOW_OFFLOAD_XMIT_DIRECT:
++		ether_addr_copy(ha, this_tuple->out.h_dest);
++		break;
++	case FLOW_OFFLOAD_XMIT_NEIGH:
++		other_tuple = &flow->tuplehash[!dir].tuple;
++		daddr = &other_tuple->src_v4;
++		dst_cache = this_tuple->dst_cache;
++		n = dst_neigh_lookup(dst_cache, daddr);
++		if (!n)
++			return -ENOENT;
++
++		read_lock_bh(&n->lock);
++		nud_state = n->nud_state;
++		ether_addr_copy(ha, n->ha);
++		read_unlock_bh(&n->lock);
++		neigh_release(n);
++
++		if (!(nud_state & NUD_VALID))
++			return -ENOENT;
++		break;
++	default:
++		return -EOPNOTSUPP;
++	}
++
++	mask = ~0xffffffff;
++	memcpy(&val, ha, 4);
++	flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
++			    &val, &mask);
++
++	mask = ~0x0000ffff;
++	memcpy(&val16, ha + 4, 2);
++	val = val16;
++	flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
++			    &val, &mask);
++
++	return 0;
++}
++
++static void flow_offload_ipv4_snat(struct net *net,
++				   const struct flow_offload *flow,
++				   enum flow_offload_tuple_dir dir,
++				   struct nf_flow_rule *flow_rule)
++{
++	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++	u32 mask = ~htonl(0xffffffff);
++	__be32 addr;
++	u32 offset;
++
++	switch (dir) {
++	case FLOW_OFFLOAD_DIR_ORIGINAL:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
++		offset = offsetof(struct iphdr, saddr);
++		break;
++	case FLOW_OFFLOAD_DIR_REPLY:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
++		offset = offsetof(struct iphdr, daddr);
++		break;
++	default:
++		return;
++	}
++
++	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
++			    &addr, &mask);
++}
++
++static void flow_offload_ipv4_dnat(struct net *net,
++				   const struct flow_offload *flow,
++				   enum flow_offload_tuple_dir dir,
++				   struct nf_flow_rule *flow_rule)
++{
++	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++	u32 mask = ~htonl(0xffffffff);
++	__be32 addr;
++	u32 offset;
++
++	switch (dir) {
++	case FLOW_OFFLOAD_DIR_ORIGINAL:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
++		offset = offsetof(struct iphdr, daddr);
++		break;
++	case FLOW_OFFLOAD_DIR_REPLY:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
++		offset = offsetof(struct iphdr, saddr);
++		break;
++	default:
++		return;
++	}
++
++	flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
++			    &addr, &mask);
++}
++
++static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
++				     unsigned int offset,
++				     const __be32 *addr, const __be32 *mask)
++{
++	struct flow_action_entry *entry;
++	int i, j;
++
++	for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
++		entry = flow_action_entry_next(flow_rule);
++		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
++				    offset + i, &addr[j], mask);
++	}
++}
++
++static void flow_offload_ipv6_snat(struct net *net,
++				   const struct flow_offload *flow,
++				   enum flow_offload_tuple_dir dir,
++				   struct nf_flow_rule *flow_rule)
++{
++	u32 mask = ~htonl(0xffffffff);
++	const __be32 *addr;
++	u32 offset;
++
++	switch (dir) {
++	case FLOW_OFFLOAD_DIR_ORIGINAL:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
++		offset = offsetof(struct ipv6hdr, saddr);
++		break;
++	case FLOW_OFFLOAD_DIR_REPLY:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
++		offset = offsetof(struct ipv6hdr, daddr);
++		break;
++	default:
++		return;
++	}
++
++	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
++}
++
++static void flow_offload_ipv6_dnat(struct net *net,
++				   const struct flow_offload *flow,
++				   enum flow_offload_tuple_dir dir,
++				   struct nf_flow_rule *flow_rule)
++{
++	u32 mask = ~htonl(0xffffffff);
++	const __be32 *addr;
++	u32 offset;
++
++	switch (dir) {
++	case FLOW_OFFLOAD_DIR_ORIGINAL:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
++		offset = offsetof(struct ipv6hdr, daddr);
++		break;
++	case FLOW_OFFLOAD_DIR_REPLY:
++		addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
++		offset = offsetof(struct ipv6hdr, saddr);
++		break;
++	default:
++		return;
++	}
++
++	flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
++}
++
++static int flow_offload_l4proto(const struct flow_offload *flow)
++{
++	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
++	u8 type = 0;
++
++	switch (protonum) {
++	case IPPROTO_TCP:
++		type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
++		break;
++	case IPPROTO_UDP:
++		type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
++		break;
++	default:
++		break;
++	}
++
++	return type;
++}
++
++static void flow_offload_port_snat(struct net *net,
++				   const struct flow_offload *flow,
++				   enum flow_offload_tuple_dir dir,
++				   struct nf_flow_rule *flow_rule)
++{
++	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++	u32 mask, port;
++	u32 offset;
++
++	switch (dir) {
++	case FLOW_OFFLOAD_DIR_ORIGINAL:
++		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
++		offset = 0; /* offsetof(struct tcphdr, source); */
++		port = htonl(port << 16);
++		mask = ~htonl(0xffff0000);
++		break;
++	case FLOW_OFFLOAD_DIR_REPLY:
++		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
++		offset = 0; /* offsetof(struct tcphdr, dest); */
++		port = htonl(port);
++		mask = ~htonl(0xffff);
++		break;
++	default:
++		return;
++	}
++
++	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
++			    &port, &mask);
++}
++
++static void flow_offload_port_dnat(struct net *net,
++				   const struct flow_offload *flow,
++				   enum flow_offload_tuple_dir dir,
++				   struct nf_flow_rule *flow_rule)
++{
++	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++	u32 mask, port;
++	u32 offset;
++
++	switch (dir) {
++	case FLOW_OFFLOAD_DIR_ORIGINAL:
++		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
++		offset = 0; /* offsetof(struct tcphdr, dest); */
++		port = htonl(port);
++		mask = ~htonl(0xffff);
++		break;
++	case FLOW_OFFLOAD_DIR_REPLY:
++		port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
++		offset = 0; /* offsetof(struct tcphdr, source); */
++		port = htonl(port << 16);
++		mask = ~htonl(0xffff0000);
++		break;
++	default:
++		return;
++	}
++
++	flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
++			    &port, &mask);
++}
++
++static void flow_offload_ipv4_checksum(struct net *net,
++				       const struct flow_offload *flow,
++				       struct nf_flow_rule *flow_rule)
++{
++	u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
++	struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
++
++	entry->id = FLOW_ACTION_CSUM;
++	entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
++
++	switch (protonum) {
++	case IPPROTO_TCP:
++		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
++		break;
++	case IPPROTO_UDP:
++		entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
++		break;
++	}
++}
++
++static void flow_offload_redirect(struct net *net,
++				  const struct flow_offload *flow,
++				  enum flow_offload_tuple_dir dir,
++				  struct nf_flow_rule *flow_rule)
++{
++	const struct flow_offload_tuple *this_tuple, *other_tuple;
++	struct flow_action_entry *entry;
++	struct net_device *dev;
++	int ifindex;
++
++	this_tuple = &flow->tuplehash[dir].tuple;
++	switch (this_tuple->xmit_type) {
++	case FLOW_OFFLOAD_XMIT_DIRECT:
++		this_tuple = &flow->tuplehash[dir].tuple;
++		ifindex = this_tuple->out.hw_ifidx;
++		break;
++	case FLOW_OFFLOAD_XMIT_NEIGH:
++		other_tuple = &flow->tuplehash[!dir].tuple;
++		ifindex = other_tuple->iifidx;
++		break;
++	default:
++		return;
++	}
++
++	dev = dev_get_by_index(net, ifindex);
++	if (!dev)
++		return;
++
++	entry = flow_action_entry_next(flow_rule);
++	entry->id = FLOW_ACTION_REDIRECT;
++	entry->dev = dev;
++}
++
++static void flow_offload_encap_tunnel(const struct flow_offload *flow,
++				      enum flow_offload_tuple_dir dir,
++				      struct nf_flow_rule *flow_rule)
++{
++	const struct flow_offload_tuple *this_tuple;
++	struct flow_action_entry *entry;
++	struct dst_entry *dst;
++
++	this_tuple = &flow->tuplehash[dir].tuple;
++	if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
++		return;
++
++	dst = this_tuple->dst_cache;
++	if (dst && dst->lwtstate) {
++		struct ip_tunnel_info *tun_info;
++
++		tun_info = lwt_tun_info(dst->lwtstate);
++		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
++			entry = flow_action_entry_next(flow_rule);
++			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
++			entry->tunnel = tun_info;
++		}
++	}
++}
++
++static void flow_offload_decap_tunnel(const struct flow_offload *flow,
++				      enum flow_offload_tuple_dir dir,
++				      struct nf_flow_rule *flow_rule)
++{
++	const struct flow_offload_tuple *other_tuple;
++	struct flow_action_entry *entry;
++	struct dst_entry *dst;
++
++	other_tuple = &flow->tuplehash[!dir].tuple;
++	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
++		return;
++
++	dst = other_tuple->dst_cache;
++	if (dst && dst->lwtstate) {
++		struct ip_tunnel_info *tun_info;
++
++		tun_info = lwt_tun_info(dst->lwtstate);
++		if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
++			entry = flow_action_entry_next(flow_rule);
++			entry->id = FLOW_ACTION_TUNNEL_DECAP;
++		}
++	}
++}
++
++static int
++nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
++			  enum flow_offload_tuple_dir dir,
++			  struct nf_flow_rule *flow_rule)
++{
++	const struct flow_offload_tuple *other_tuple;
++	const struct flow_offload_tuple *tuple;
++	int i;
++
++	flow_offload_decap_tunnel(flow, dir, flow_rule);
++	flow_offload_encap_tunnel(flow, dir, flow_rule);
++
++	if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
++	    flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
++		return -1;
++
++	tuple = &flow->tuplehash[dir].tuple;
++
++	for (i = 0; i < tuple->encap_num; i++) {
++		struct flow_action_entry *entry;
++
++		if (tuple->in_vlan_ingress & BIT(i))
++			continue;
++
++		if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
++			entry = flow_action_entry_next(flow_rule);
++			entry->id = FLOW_ACTION_VLAN_POP;
++		}
++	}
++
++	other_tuple = &flow->tuplehash[!dir].tuple;
++
++	for (i = 0; i < other_tuple->encap_num; i++) {
++		struct flow_action_entry *entry;
++
++		if (other_tuple->in_vlan_ingress & BIT(i))
++			continue;
++
++		entry = flow_action_entry_next(flow_rule);
++
++		switch (other_tuple->encap[i].proto) {
++		case htons(ETH_P_PPP_SES):
++			entry->id = FLOW_ACTION_PPPOE_PUSH;
++			entry->pppoe.sid = other_tuple->encap[i].id;
++			break;
++		case htons(ETH_P_8021Q):
++			entry->id = FLOW_ACTION_VLAN_PUSH;
++			entry->vlan.vid = other_tuple->encap[i].id;
++			entry->vlan.proto = other_tuple->encap[i].proto;
++			break;
++		}
++	}
++
++	return 0;
++}
++
++int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
++			    enum flow_offload_tuple_dir dir,
++			    struct nf_flow_rule *flow_rule)
++{
++	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
++		return -1;
++
++	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++		flow_offload_ipv4_snat(net, flow, dir, flow_rule);
++		flow_offload_port_snat(net, flow, dir, flow_rule);
++	}
++	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++		flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
++		flow_offload_port_dnat(net, flow, dir, flow_rule);
++	}
++	if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
++	    test_bit(NF_FLOW_DNAT, &flow->flags))
++		flow_offload_ipv4_checksum(net, flow, flow_rule);
++
++	flow_offload_redirect(net, flow, dir, flow_rule);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
++
++int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
++			    enum flow_offload_tuple_dir dir,
++			    struct nf_flow_rule *flow_rule)
++{
++	if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
++		return -1;
++
++	if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
++		flow_offload_ipv6_snat(net, flow, dir, flow_rule);
++		flow_offload_port_snat(net, flow, dir, flow_rule);
++	}
++	if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
++		flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
++		flow_offload_port_dnat(net, flow, dir, flow_rule);
++	}
++
++	flow_offload_redirect(net, flow, dir, flow_rule);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
++
++#define NF_FLOW_RULE_ACTION_MAX	16
++
++static struct nf_flow_rule *
++nf_flow_offload_rule_alloc(struct net *net,
++			   const struct flow_offload_work *offload,
++			   enum flow_offload_tuple_dir dir)
++{
++	const struct nf_flowtable *flowtable = offload->flowtable;
++	const struct flow_offload_tuple *tuple, *other_tuple;
++	const struct flow_offload *flow = offload->flow;
++	struct dst_entry *other_dst = NULL;
++	struct nf_flow_rule *flow_rule;
++	int err = -ENOMEM;
++
++	flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
++	if (!flow_rule)
++		goto err_flow;
++
++	flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
++	if (!flow_rule->rule)
++		goto err_flow_rule;
++
++	flow_rule->rule->match.dissector = &flow_rule->match.dissector;
++	flow_rule->rule->match.mask = &flow_rule->match.mask;
++	flow_rule->rule->match.key = &flow_rule->match.key;
++
++	tuple = &flow->tuplehash[dir].tuple;
++	other_tuple = &flow->tuplehash[!dir].tuple;
++	if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
++		other_dst = other_tuple->dst_cache;
++
++	err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
++	if (err < 0)
++		goto err_flow_match;
++
++	flow_rule->rule->action.num_entries = 0;
++	if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
++		goto err_flow_match;
++
++	return flow_rule;
++
++err_flow_match:
++	kfree(flow_rule->rule);
++err_flow_rule:
++	kfree(flow_rule);
++err_flow:
++	return NULL;
++}
++
++static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
++{
++	struct flow_action_entry *entry;
++	int i;
++
++	for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
++		entry = &flow_rule->rule->action.entries[i];
++		if (entry->id != FLOW_ACTION_REDIRECT)
++			continue;
++
++		dev_put(entry->dev);
++	}
++	kfree(flow_rule->rule);
++	kfree(flow_rule);
++}
++
++static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
++{
++	int i;
++
++	for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
++		__nf_flow_offload_destroy(flow_rule[i]);
++}
++
++static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
++				 struct nf_flow_rule *flow_rule[])
++{
++	struct net *net = read_pnet(&offload->flowtable->net);
++
++	flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
++						  FLOW_OFFLOAD_DIR_ORIGINAL);
++	if (!flow_rule[0])
++		return -ENOMEM;
++
++	flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
++						  FLOW_OFFLOAD_DIR_REPLY);
++	if (!flow_rule[1]) {
++		__nf_flow_offload_destroy(flow_rule[0]);
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
++				 __be16 proto, int priority,
++				 enum flow_cls_command cmd,
++				 const struct flow_offload_tuple *tuple,
++				 struct netlink_ext_ack *extack)
++{
++	cls_flow->common.protocol = proto;
++	cls_flow->common.prio = priority;
++	cls_flow->common.extack = extack;
++	cls_flow->command = cmd;
++	cls_flow->cookie = (unsigned long)tuple;
++}
++
++static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
++				 struct flow_offload *flow,
++				 struct nf_flow_rule *flow_rule,
++				 enum flow_offload_tuple_dir dir,
++				 int priority, int cmd,
++				 struct flow_stats *stats,
++				 struct list_head *block_cb_list)
++{
++	struct flow_cls_offload cls_flow = {};
++	struct flow_block_cb *block_cb;
++	struct netlink_ext_ack extack;
++	__be16 proto = ETH_P_ALL;
++	int err, i = 0;
++
++	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
++			     &flow->tuplehash[dir].tuple, &extack);
++	if (cmd == FLOW_CLS_REPLACE)
++		cls_flow.rule = flow_rule->rule;
++
++	down_read(&flowtable->flow_block_lock);
++	list_for_each_entry(block_cb, block_cb_list, list) {
++		err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
++				   block_cb->cb_priv);
++		if (err < 0)
++			continue;
++
++		i++;
++	}
++	up_read(&flowtable->flow_block_lock);
++
++	if (cmd == FLOW_CLS_STATS)
++		memcpy(stats, &cls_flow.stats, sizeof(*stats));
++
++	return i;
++}
++
++static int flow_offload_tuple_add(struct flow_offload_work *offload,
++				  struct nf_flow_rule *flow_rule,
++				  enum flow_offload_tuple_dir dir)
++{
++	return nf_flow_offload_tuple(offload->flowtable, offload->flow,
++				     flow_rule, dir, offload->priority,
++				     FLOW_CLS_REPLACE, NULL,
++				     &offload->flowtable->flow_block.cb_list);
++}
++
++static void flow_offload_tuple_del(struct flow_offload_work *offload,
++				   enum flow_offload_tuple_dir dir)
++{
++	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
++			      offload->priority, FLOW_CLS_DESTROY, NULL,
++			      &offload->flowtable->flow_block.cb_list);
++}
++
++static int flow_offload_rule_add(struct flow_offload_work *offload,
++				 struct nf_flow_rule *flow_rule[])
++{
++	int ok_count = 0;
++
++	ok_count += flow_offload_tuple_add(offload, flow_rule[0],
++					   FLOW_OFFLOAD_DIR_ORIGINAL);
++	ok_count += flow_offload_tuple_add(offload, flow_rule[1],
++					   FLOW_OFFLOAD_DIR_REPLY);
++	if (ok_count == 0)
++		return -ENOENT;
++
++	return 0;
++}
++
++static void flow_offload_work_add(struct flow_offload_work *offload)
++{
++	struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
++	int err;
++
++	err = nf_flow_offload_alloc(offload, flow_rule);
++	if (err < 0)
++		return;
++
++	err = flow_offload_rule_add(offload, flow_rule);
++	if (err < 0)
++		goto out;
++
++	set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
++
++out:
++	nf_flow_offload_destroy(flow_rule);
++}
++
++static void flow_offload_work_del(struct flow_offload_work *offload)
++{
++	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
++	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
++	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
++	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
++}
++
++static void flow_offload_tuple_stats(struct flow_offload_work *offload,
++				     enum flow_offload_tuple_dir dir,
++				     struct flow_stats *stats)
++{
++	nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
++			      offload->priority, FLOW_CLS_STATS, stats,
++			      &offload->flowtable->flow_block.cb_list);
++}
++
++static void flow_offload_work_stats(struct flow_offload_work *offload)
++{
++	struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
++	u64 lastused;
++
++	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
++	flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
++
++	lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
++	offload->flow->timeout = max_t(u64, offload->flow->timeout,
++				       lastused + flow_offload_get_timeout(offload->flow));
++
++	if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
++		if (stats[0].pkts)
++			nf_ct_acct_add(offload->flow->ct,
++				       FLOW_OFFLOAD_DIR_ORIGINAL,
++				       stats[0].pkts, stats[0].bytes);
++		if (stats[1].pkts)
++			nf_ct_acct_add(offload->flow->ct,
++				       FLOW_OFFLOAD_DIR_REPLY,
++				       stats[1].pkts, stats[1].bytes);
++	}
++}
++
++static void flow_offload_work_handler(struct work_struct *work)
++{
++	struct flow_offload_work *offload;
++
++	offload = container_of(work, struct flow_offload_work, work);
++	switch (offload->cmd) {
++		case FLOW_CLS_REPLACE:
++			flow_offload_work_add(offload);
++			break;
++		case FLOW_CLS_DESTROY:
++			flow_offload_work_del(offload);
++			break;
++		case FLOW_CLS_STATS:
++			flow_offload_work_stats(offload);
++			break;
++		default:
++			WARN_ON_ONCE(1);
++	}
++
++	clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
++	kfree(offload);
++}
++
++static void flow_offload_queue_work(struct flow_offload_work *offload)
++{
++	if (offload->cmd == FLOW_CLS_REPLACE)
++		queue_work(nf_flow_offload_add_wq, &offload->work);
++	else if (offload->cmd == FLOW_CLS_DESTROY)
++		queue_work(nf_flow_offload_del_wq, &offload->work);
++	else
++		queue_work(nf_flow_offload_stats_wq, &offload->work);
++}
++
++static struct flow_offload_work *
++nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
++			   struct flow_offload *flow, unsigned int cmd)
++{
++	struct flow_offload_work *offload;
++
++	if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
++		return NULL;
++
++	offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
++	if (!offload) {
++		clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
++		return NULL;
++	}
++
++	offload->cmd = cmd;
++	offload->flow = flow;
++	offload->priority = flowtable->priority;
++	offload->flowtable = flowtable;
++	INIT_WORK(&offload->work, flow_offload_work_handler);
++
++	return offload;
++}
++
++
++void nf_flow_offload_add(struct nf_flowtable *flowtable,
++			 struct flow_offload *flow)
++{
++	struct flow_offload_work *offload;
++
++	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
++	if (!offload)
++		return;
++
++	flow_offload_queue_work(offload);
++}
++
++void nf_flow_offload_del(struct nf_flowtable *flowtable,
++			 struct flow_offload *flow)
++{
++	struct flow_offload_work *offload;
++
++	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
++	if (!offload)
++		return;
++
++	set_bit(NF_FLOW_HW_DYING, &flow->flags);
++	flow_offload_queue_work(offload);
++}
++
++void nf_flow_offload_stats(struct nf_flowtable *flowtable,
++			   struct flow_offload *flow)
++{
++	struct flow_offload_work *offload;
++	__s32 delta;
++
++	delta = nf_flow_timeout_delta(flow->timeout);
++	if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
++		return;
++
++	offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
++	if (!offload)
++		return;
++
++	flow_offload_queue_work(offload);
++}
++
++void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
++{
++	if (nf_flowtable_hw_offload(flowtable)) {
++		flush_workqueue(nf_flow_offload_add_wq);
++		flush_workqueue(nf_flow_offload_del_wq);
++		flush_workqueue(nf_flow_offload_stats_wq);
++	}
++}
++
++static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
++				     struct flow_block_offload *bo,
++				     enum flow_block_command cmd)
++{
++	struct flow_block_cb *block_cb, *next;
++	int err = 0;
++
++	switch (cmd) {
++	case FLOW_BLOCK_BIND:
++		list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
++		break;
++	case FLOW_BLOCK_UNBIND:
++		list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
++			list_del(&block_cb->list);
++			flow_block_cb_free(block_cb);
++		}
++		break;
++	default:
++		WARN_ON_ONCE(1);
++		err = -EOPNOTSUPP;
++	}
++
++	return err;
++}
++
++static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
++					     struct net *net,
++					     enum flow_block_command cmd,
++					     struct nf_flowtable *flowtable,
++					     struct netlink_ext_ack *extack)
++{
++	memset(bo, 0, sizeof(*bo));
++	bo->net		= net;
++	bo->block	= &flowtable->flow_block;
++	bo->command	= cmd;
++	bo->binder_type	= FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
++	bo->extack	= extack;
++	INIT_LIST_HEAD(&bo->cb_list);
++}
++
++static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
++					  struct nf_flowtable *flowtable,
++					  struct net_device *dev,
++					  enum flow_block_command cmd,
++					  struct netlink_ext_ack *extack)
++{
++	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
++					 extack);
++	flow_indr_block_call(dev, bo, cmd);
++
++	if (list_empty(&bo->cb_list))
++		return -EOPNOTSUPP;
++
++	return 0;
++}
++
++static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
++				     struct nf_flowtable *flowtable,
++				     struct net_device *dev,
++				     enum flow_block_command cmd,
++				     struct netlink_ext_ack *extack)
++{
++	int err;
++
++	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
++					 extack);
++	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
++	if (err < 0)
++		return err;
++
++	return 0;
++}
++
++int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
++				struct net_device *dev,
++				enum flow_block_command cmd)
++{
++	struct netlink_ext_ack extack = {};
++	struct flow_block_offload bo;
++	int err;
++
++	if (!nf_flowtable_hw_offload(flowtable))
++		return 0;
++
++	if (dev->netdev_ops->ndo_setup_tc)
++		err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
++						&extack);
++	else
++		err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
++						     &extack);
++	if (err < 0)
++		return err;
++
++	return nf_flow_table_block_setup(flowtable, &bo, cmd);
++}
++EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
++
++int nf_flow_table_offload_init(void)
++{
++	nf_flow_offload_add_wq  = alloc_workqueue("nf_ft_offload_add",
++						  WQ_UNBOUND | WQ_SYSFS, 0);
++	if (!nf_flow_offload_add_wq)
++		return -ENOMEM;
++
++	nf_flow_offload_del_wq  = alloc_workqueue("nf_ft_offload_del",
++						  WQ_UNBOUND | WQ_SYSFS, 0);
++	if (!nf_flow_offload_del_wq)
++		goto err_del_wq;
++
++	nf_flow_offload_stats_wq  = alloc_workqueue("nf_ft_offload_stats",
++						    WQ_UNBOUND | WQ_SYSFS, 0);
++	if (!nf_flow_offload_stats_wq)
++		goto err_stats_wq;
++
++	return 0;
++
++err_stats_wq:
++	destroy_workqueue(nf_flow_offload_del_wq);
++err_del_wq:
++	destroy_workqueue(nf_flow_offload_add_wq);
++	return -ENOMEM;
++}
++
++void nf_flow_table_offload_exit(void)
++{
++	destroy_workqueue(nf_flow_offload_add_wq);
++	destroy_workqueue(nf_flow_offload_del_wq);
++	destroy_workqueue(nf_flow_offload_stats_wq);
++}
+diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
+new file mode 100644
+index 000000000..ae1eb2656
+--- /dev/null
++++ b/net/netfilter/xt_FLOWOFFLOAD.c
+@@ -0,0 +1,719 @@
++/*
++ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter/xt_FLOWOFFLOAD.h>
++#include <linux/if_vlan.h>
++#include <net/ip.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_extend.h>
++#include <net/netfilter/nf_conntrack_helper.h>
++#include <net/netfilter/nf_flow_table.h>
++
++struct xt_flowoffload_hook {
++	struct hlist_node list;
++	struct nf_hook_ops ops;
++	struct net *net;
++	bool registered;
++	bool used;
++};
++
++struct xt_flowoffload_table {
++	struct nf_flowtable ft;
++	struct hlist_head hooks;
++	struct delayed_work work;
++};
++
++struct nf_forward_info {
++	const struct net_device *indev;
++	const struct net_device *outdev;
++	const struct net_device *hw_outdev;
++	struct id {
++		__u16	id;
++		__be16	proto;
++	} encap[NF_FLOW_TABLE_ENCAP_MAX];
++	u8 num_encaps;
++	u8 ingress_vlans;
++	u8 h_source[ETH_ALEN];
++	u8 h_dest[ETH_ALEN];
++	enum flow_offload_xmit_type xmit_type;
++};
++
++static DEFINE_SPINLOCK(hooks_lock);
++
++struct xt_flowoffload_table flowtable[2];
++
++static unsigned int
++xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
++			const struct nf_hook_state *state)
++{
++	struct vlan_ethhdr *veth;
++	__be16 proto;
++
++	switch (skb->protocol) {
++	case htons(ETH_P_8021Q):
++		veth = (struct vlan_ethhdr *)skb_mac_header(skb);
++		proto = veth->h_vlan_encapsulated_proto;
++		break;
++	case htons(ETH_P_PPP_SES):
++		proto = nf_flow_pppoe_proto(skb);
++		break;
++	default:
++		proto = skb->protocol;
++		break;
++	}
++
++	switch (proto) {
++	case htons(ETH_P_IP):
++		return nf_flow_offload_ip_hook(priv, skb, state);
++	case htons(ETH_P_IPV6):
++		return nf_flow_offload_ipv6_hook(priv, skb, state);
++	}
++
++	return NF_ACCEPT;
++}
++
++static int
++xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
++			   struct net_device *dev)
++{
++	struct xt_flowoffload_hook *hook;
++	struct nf_hook_ops *ops;
++
++	hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
++	if (!hook)
++		return -ENOMEM;
++
++	ops = &hook->ops;
++	ops->pf = NFPROTO_NETDEV;
++	ops->hooknum = NF_NETDEV_INGRESS;
++	ops->priority = 10;
++	ops->priv = &table->ft;
++	ops->hook = xt_flowoffload_net_hook;
++	ops->dev = dev;
++
++	hlist_add_head(&hook->list, &table->hooks);
++	mod_delayed_work(system_power_efficient_wq, &table->work, 0);
++
++	return 0;
++}
++
++static struct xt_flowoffload_hook *
++flow_offload_lookup_hook(struct xt_flowoffload_table *table,
++			 struct net_device *dev)
++{
++	struct xt_flowoffload_hook *hook;
++
++	hlist_for_each_entry(hook, &table->hooks, list) {
++		if (hook->ops.dev == dev)
++			return hook;
++	}
++
++	return NULL;
++}
++
++static void
++xt_flowoffload_check_device(struct xt_flowoffload_table *table,
++			    struct net_device *dev)
++{
++	struct xt_flowoffload_hook *hook;
++
++	if (!dev)
++		return;
++
++	spin_lock_bh(&hooks_lock);
++	hook = flow_offload_lookup_hook(table, dev);
++	if (hook)
++		hook->used = true;
++	else
++		xt_flowoffload_create_hook(table, dev);
++	spin_unlock_bh(&hooks_lock);
++}
++
++static void
++xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
++{
++	struct xt_flowoffload_hook *hook;
++
++restart:
++	hlist_for_each_entry(hook, &table->hooks, list) {
++		if (hook->registered)
++			continue;
++
++		hook->registered = true;
++		hook->net = dev_net(hook->ops.dev);
++		spin_unlock_bh(&hooks_lock);
++		nf_register_net_hook(hook->net, &hook->ops);
++		if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
++			table->ft.type->setup(&table->ft, hook->ops.dev,
++					      FLOW_BLOCK_BIND);
++		spin_lock_bh(&hooks_lock);
++		goto restart;
++	}
++
++}
++
++static bool
++xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
++{
++	struct xt_flowoffload_hook *hook;
++	bool active = false;
++
++restart:
++	spin_lock_bh(&hooks_lock);
++	hlist_for_each_entry(hook, &table->hooks, list) {
++		if (hook->used || !hook->registered) {
++			active = true;
++			continue;
++		}
++
++		hlist_del(&hook->list);
++		spin_unlock_bh(&hooks_lock);
++		if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
++			table->ft.type->setup(&table->ft, hook->ops.dev,
++					      FLOW_BLOCK_UNBIND);
++		nf_unregister_net_hook(hook->net, &hook->ops);
++		kfree(hook);
++		goto restart;
++	}
++	spin_unlock_bh(&hooks_lock);
++
++	return active;
++}
++
++static void
++xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
++{
++	struct xt_flowoffload_table *table = data;
++	struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
++	struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
++	struct xt_flowoffload_hook *hook;
++
++	spin_lock_bh(&hooks_lock);
++	hlist_for_each_entry(hook, &table->hooks, list) {
++		if (hook->ops.dev->ifindex != tuple0->iifidx &&
++		    hook->ops.dev->ifindex != tuple1->iifidx)
++			continue;
++
++		hook->used = true;
++	}
++	spin_unlock_bh(&hooks_lock);
++}
++
++static void
++xt_flowoffload_hook_work(struct work_struct *work)
++{
++	struct xt_flowoffload_table *table;
++	struct xt_flowoffload_hook *hook;
++	int err;
++
++	table = container_of(work, struct xt_flowoffload_table, work.work);
++
++	spin_lock_bh(&hooks_lock);
++	xt_flowoffload_register_hooks(table);
++	hlist_for_each_entry(hook, &table->hooks, list)
++		hook->used = false;
++	spin_unlock_bh(&hooks_lock);
++
++	err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
++				    table);
++	if (err && err != -EAGAIN)
++		goto out;
++
++	if (!xt_flowoffload_cleanup_hooks(table))
++		return;
++
++out:
++	queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
++}
++
++static bool
++xt_flowoffload_skip(struct sk_buff *skb, int family)
++{
++	if (skb_sec_path(skb))
++		return true;
++
++	if (family == NFPROTO_IPV4) {
++		const struct ip_options *opt = &(IPCB(skb)->opt);
++
++		if (unlikely(opt->optlen))
++			return true;
++	}
++
++	return false;
++}
++
++static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
++{
++	if (dst_xfrm(dst))
++		return FLOW_OFFLOAD_XMIT_XFRM;
++
++	return FLOW_OFFLOAD_XMIT_NEIGH;
++}
++
++static void nf_default_forward_path(struct nf_flow_route *route,
++				    struct dst_entry *dst_cache,
++				    enum ip_conntrack_dir dir,
++				    struct net_device **dev)
++{
++	route->tuple[!dir].in.ifindex	= dst_cache->dev->ifindex;
++	route->tuple[dir].dst		= dst_cache;
++	route->tuple[dir].xmit_type	= nf_xmit_type(dst_cache);
++}
++
++static bool nf_is_valid_ether_device(const struct net_device *dev)
++{
++	if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
++	    dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
++		return false;
++
++	return true;
++}
++
++static void nf_dev_path_info(const struct net_device_path_stack *stack,
++			     struct nf_forward_info *info,
++			     unsigned char *ha)
++{
++	const struct net_device_path *path;
++	int i;
++
++	memcpy(info->h_dest, ha, ETH_ALEN);
++
++	for (i = 0; i < stack->num_paths; i++) {
++		path = &stack->path[i];
++
++		info->indev = path->dev;
++
++		switch (path->type) {
++		case DEV_PATH_ETHERNET:
++		case DEV_PATH_DSA:
++		case DEV_PATH_VLAN:
++		case DEV_PATH_PPPOE:
++			if (is_zero_ether_addr(info->h_source))
++				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
++
++			if (path->type == DEV_PATH_ETHERNET)
++				break;
++			if (path->type == DEV_PATH_DSA) {
++				i = stack->num_paths;
++				break;
++			}
++
++			/* DEV_PATH_VLAN and DEV_PATH_PPPOE */
++			if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
++				info->indev = NULL;
++				break;
++			}
++			if (!info->outdev)
++				info->outdev = path->dev;
++			info->encap[info->num_encaps].id = path->encap.id;
++			info->encap[info->num_encaps].proto = path->encap.proto;
++			info->num_encaps++;
++			if (path->type == DEV_PATH_PPPOE)
++				memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
++			break;
++		case DEV_PATH_BRIDGE:
++			if (is_zero_ether_addr(info->h_source))
++				memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
++
++			switch (path->bridge.vlan_mode) {
++			case DEV_PATH_BR_VLAN_UNTAG_HW:
++				info->ingress_vlans |= BIT(info->num_encaps - 1);
++				break;
++			case DEV_PATH_BR_VLAN_TAG:
++				info->encap[info->num_encaps].id = path->bridge.vlan_id;
++				info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
++				info->num_encaps++;
++				break;
++			case DEV_PATH_BR_VLAN_UNTAG:
++				info->num_encaps--;
++				break;
++			case DEV_PATH_BR_VLAN_KEEP:
++				break;
++			}
++			break;
++		default:
++			break;
++		}
++	}
++	if (!info->outdev)
++		info->outdev = info->indev;
++
++	info->hw_outdev = info->indev;
++
++	if (nf_is_valid_ether_device(info->indev))
++		info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
++}
++
++static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
++				     const struct dst_entry *dst_cache,
++				     const struct nf_conn *ct,
++				     enum ip_conntrack_dir dir, u8 *ha,
++				     struct net_device_path_stack *stack)
++{
++	const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
++	struct net_device *dev = dst_cache->dev;
++	struct neighbour *n;
++	u8 nud_state;
++
++	if (!nf_is_valid_ether_device(dev))
++		goto out;
++
++	n = dst_neigh_lookup(dst_cache, daddr);
++	if (!n)
++		return -1;
++
++	read_lock_bh(&n->lock);
++	nud_state = n->nud_state;
++	ether_addr_copy(ha, n->ha);
++	read_unlock_bh(&n->lock);
++	neigh_release(n);
++
++	if (!(nud_state & NUD_VALID))
++		return -1;
++
++out:
++	return dev_fill_forward_path(dev, ha, stack);
++}
++
++static int nf_dev_forward_path(struct nf_flow_route *route,
++				const struct nf_conn *ct,
++				enum ip_conntrack_dir dir,
++				struct net_device **devs)
++{
++	const struct dst_entry *dst = route->tuple[dir].dst;
++	struct net_device_path_stack stack;
++	struct nf_forward_info info = {};
++	unsigned char ha[ETH_ALEN];
++	int i;
++
++	if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
++		nf_dev_path_info(&stack, &info, ha);
++
++	devs[!dir] = (struct net_device *)info.indev;
++	if (!info.indev)
++		return -1;
++
++	route->tuple[!dir].in.ifindex = info.indev->ifindex;
++	for (i = 0; i < info.num_encaps; i++) {
++		route->tuple[!dir].in.encap[i].id = info.encap[i].id;
++		route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
++	}
++	route->tuple[!dir].in.num_encaps = info.num_encaps;
++	route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
++
++	if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
++		memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
++		memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
++		route->tuple[dir].out.ifindex = info.outdev->ifindex;
++		route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
++		route->tuple[dir].xmit_type = info.xmit_type;
++	}
++
++	return 0;
++}
++
++static int
++xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
++			 enum ip_conntrack_dir dir,
++			 const struct xt_action_param *par, int ifindex,
++			 struct net_device **devs)
++{
++	struct dst_entry *dst = NULL;
++	struct flowi fl;
++
++	memset(&fl, 0, sizeof(fl));
++	switch (xt_family(par)) {
++	case NFPROTO_IPV4:
++		fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
++		fl.u.ip4.flowi4_oif = ifindex;
++		break;
++	case NFPROTO_IPV6:
++		fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
++		fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
++		fl.u.ip6.flowi6_oif = ifindex;
++		break;
++	}
++
++	nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
++	if (!dst)
++		return -ENOENT;
++
++	nf_default_forward_path(route, dst, dir, devs);
++
++	return 0;
++}
++
++static int
++xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
++		     const struct xt_action_param *par,
++		     struct nf_flow_route *route, enum ip_conntrack_dir dir,
++		     struct net_device **devs)
++{
++	int ret;
++
++	ret = xt_flowoffload_route_dir(route, ct, dir, par,
++				       devs[dir]->ifindex,
++				       devs);
++	if (ret)
++		return ret;
++
++	ret = xt_flowoffload_route_dir(route, ct, !dir, par,
++				       devs[!dir]->ifindex,
++				       devs);
++	if (ret)
++		return ret;
++
++	if (route->tuple[dir].xmit_type	== FLOW_OFFLOAD_XMIT_NEIGH &&
++	    route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
++		if (nf_dev_forward_path(route, ct, dir, devs))
++			return -1;
++		if (nf_dev_forward_path(route, ct, !dir, devs))
++			return -1;
++	}
++
++	return 0;
++}
++
++static unsigned int
++flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
++{
++	struct xt_flowoffload_table *table;
++	const struct xt_flowoffload_target_info *info = par->targinfo;
++	struct tcphdr _tcph, *tcph = NULL;
++	enum ip_conntrack_info ctinfo;
++	enum ip_conntrack_dir dir;
++	struct nf_flow_route route = {};
++	struct flow_offload *flow = NULL;
++	struct net_device *devs[2] = {};
++	struct nf_conn *ct;
++	struct net *net;
++
++	if (xt_flowoffload_skip(skb, xt_family(par)))
++		return XT_CONTINUE;
++
++	ct = nf_ct_get(skb, &ctinfo);
++	if (ct == NULL)
++		return XT_CONTINUE;
++
++	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
++	case IPPROTO_TCP:
++		if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
++			return XT_CONTINUE;
++
++		tcph = skb_header_pointer(skb, par->thoff,
++					  sizeof(_tcph), &_tcph);
++		if (unlikely(!tcph || tcph->fin || tcph->rst))
++			return XT_CONTINUE;
++		break;
++	case IPPROTO_UDP:
++		break;
++	default:
++		return XT_CONTINUE;
++	}
++
++	if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
++	    ct->status & IPS_SEQ_ADJUST)
++		return XT_CONTINUE;
++
++	if (!nf_ct_is_confirmed(ct))
++		return XT_CONTINUE;
++
++	devs[dir] = xt_out(par);
++	devs[!dir] = xt_in(par);
++
++	if (!devs[dir] || !devs[!dir])
++		return XT_CONTINUE;
++
++	if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
++		return XT_CONTINUE;
++
++	dir = CTINFO2DIR(ctinfo);
++
++	if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
++		goto err_flow_route;
++
++	flow = flow_offload_alloc(ct);
++	if (!flow)
++		goto err_flow_alloc;
++
++	if (flow_offload_route_init(flow, &route) < 0)
++		goto err_flow_add;
++
++	if (tcph) {
++		ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
++		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
++	}
++
++	table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
++
++	net = read_pnet(&table->ft.net);
++	if (!net)
++		write_pnet(&table->ft.net, xt_net(par));
++
++	if (flow_offload_add(&table->ft, flow) < 0)
++		goto err_flow_add;
++
++	xt_flowoffload_check_device(table, devs[0]);
++	xt_flowoffload_check_device(table, devs[1]);
++
++	dst_release(route.tuple[!dir].dst);
++
++	return XT_CONTINUE;
++
++err_flow_add:
++	flow_offload_free(flow);
++err_flow_alloc:
++	dst_release(route.tuple[!dir].dst);
++err_flow_route:
++	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
++
++	return XT_CONTINUE;
++}
++
++static int flowoffload_chk(const struct xt_tgchk_param *par)
++{
++	struct xt_flowoffload_target_info *info = par->targinfo;
++
++	if (info->flags & ~XT_FLOWOFFLOAD_MASK)
++		return -EINVAL;
++
++	return 0;
++}
++
++static struct xt_target offload_tg_reg __read_mostly = {
++	.family		= NFPROTO_UNSPEC,
++	.name		= "FLOWOFFLOAD",
++	.revision	= 0,
++	.targetsize	= sizeof(struct xt_flowoffload_target_info),
++	.usersize	= sizeof(struct xt_flowoffload_target_info),
++	.checkentry	= flowoffload_chk,
++	.target		= flowoffload_tg,
++	.me		= THIS_MODULE,
++};
++
++static int flow_offload_netdev_event(struct notifier_block *this,
++				     unsigned long event, void *ptr)
++{
++	struct xt_flowoffload_hook *hook0, *hook1;
++	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++
++	if (event != NETDEV_UNREGISTER)
++		return NOTIFY_DONE;
++
++	spin_lock_bh(&hooks_lock);
++	hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
++	if (hook0)
++		hlist_del(&hook0->list);
++
++	hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
++	if (hook1)
++		hlist_del(&hook1->list);
++	spin_unlock_bh(&hooks_lock);
++
++	if (hook0) {
++		nf_unregister_net_hook(hook0->net, &hook0->ops);
++		kfree(hook0);
++	}
++
++	if (hook1) {
++		nf_unregister_net_hook(hook1->net, &hook1->ops);
++		kfree(hook1);
++	}
++
++	nf_flow_table_cleanup(dev);
++
++	return NOTIFY_DONE;
++}
++
++static struct notifier_block flow_offload_netdev_notifier = {
++	.notifier_call	= flow_offload_netdev_event,
++};
++
++static int nf_flow_rule_route_inet(struct net *net,
++				   const struct flow_offload *flow,
++				   enum flow_offload_tuple_dir dir,
++				   struct nf_flow_rule *flow_rule)
++{
++	const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
++	int err;
++
++	switch (flow_tuple->l3proto) {
++	case NFPROTO_IPV4:
++		err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
++		break;
++	case NFPROTO_IPV6:
++		err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
++		break;
++	default:
++		err = -1;
++		break;
++	}
++
++	return err;
++}
++
++static struct nf_flowtable_type flowtable_inet = {
++	.family		= NFPROTO_INET,
++	.init		= nf_flow_table_init,
++	.setup		= nf_flow_table_offload_setup,
++	.action		= nf_flow_rule_route_inet,
++	.free		= nf_flow_table_free,
++	.hook		= xt_flowoffload_net_hook,
++	.owner		= THIS_MODULE,
++};
++
++static int init_flowtable(struct xt_flowoffload_table *tbl)
++{
++	INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
++	tbl->ft.type = &flowtable_inet;
++
++	return nf_flow_table_init(&tbl->ft);
++}
++
++static int __init xt_flowoffload_tg_init(void)
++{
++	int ret;
++
++	register_netdevice_notifier(&flow_offload_netdev_notifier);
++
++	ret = init_flowtable(&flowtable[0]);
++	if (ret)
++		return ret;
++
++	ret = init_flowtable(&flowtable[1]);
++	if (ret)
++		goto cleanup;
++
++	flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
++
++	ret = xt_register_target(&offload_tg_reg);
++	if (ret)
++		goto cleanup2;
++
++	return 0;
++
++cleanup2:
++	nf_flow_table_free(&flowtable[1].ft);
++cleanup:
++	nf_flow_table_free(&flowtable[0].ft);
++	return ret;
++}
++
++static void __exit xt_flowoffload_tg_exit(void)
++{
++	xt_unregister_target(&offload_tg_reg);
++	unregister_netdevice_notifier(&flow_offload_netdev_notifier);
++	nf_flow_table_free(&flowtable[0].ft);
++	nf_flow_table_free(&flowtable[1].ft);
++}
++
++MODULE_LICENSE("GPL");
++module_init(xt_flowoffload_tg_init);
++module_exit(xt_flowoffload_tg_exit);
+-- 
+2.18.0
+
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9991-add-read-poll-timeout-function-for-kernel5.4.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9991-add-read-poll-timeout-function-for-kernel5.4.patch
new file mode 100755
index 0000000..c8221b3
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9991-add-read-poll-timeout-function-for-kernel5.4.patch
@@ -0,0 +1,73 @@
+From 2be6a2d4eaa4db01d3afbd5e9d6fd15494a87f2f Mon Sep 17 00:00:00 2001
+From: Bo Jiao <Bo.Jiao@mediatek.com>
+Date: Fri, 17 Jun 2022 11:23:57 +0800
+Subject: [PATCH 2/8] 9991-add-read-poll-timeout-function-for-kernel5.4
+
+---
+ include/linux/iopoll.h | 30 +++++++++++++++++++++++++++---
+ 1 file changed, 27 insertions(+), 3 deletions(-)
+ mode change 100644 => 100755 include/linux/iopoll.h
+
+diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
+old mode 100644
+new mode 100755
+index 35e15dfd4..d96087008
+--- a/include/linux/iopoll.h
++++ b/include/linux/iopoll.h
+@@ -31,19 +31,22 @@
+  * When available, you'll probably want to use one of the specialized
+  * macros defined below rather than this macro directly.
+  */
+-#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)	\
++#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \
++				sleep_before_read, args...) \
+ ({ \
+ 	u64 __timeout_us = (timeout_us); \
+ 	unsigned long __sleep_us = (sleep_us); \
+ 	ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
+ 	might_sleep_if((__sleep_us) != 0); \
++	if (sleep_before_read && __sleep_us) \
++		usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
+ 	for (;;) { \
+-		(val) = op(addr); \
++		(val) = op(args); \
+ 		if (cond) \
+ 			break; \
+ 		if (__timeout_us && \
+ 		    ktime_compare(ktime_get(), __timeout) > 0) { \
+-			(val) = op(addr); \
++			(val) = op(args); \
+ 			break; \
+ 		} \
+ 		if (__sleep_us) \
+@@ -52,6 +55,27 @@
+ 	(cond) ? 0 : -ETIMEDOUT; \
+ })
+ 
++/**
++ * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs
++ * @op: accessor function (takes @addr as its only argument)
++ * @addr: Address to poll
++ * @val: Variable to read the value into
++ * @cond: Break condition (usually involving @val)
++ * @sleep_us: Maximum time to sleep between reads in us (0
++ *            tight-loops).  Should be less than ~20ms since usleep_range
++ *            is used (see Documentation/timers/timers-howto.rst).
++ * @timeout_us: Timeout in us, 0 means never timeout
++ *
++ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
++ * case, the last read value at @addr is stored in @val. Must not
++ * be called from atomic context if sleep_us or timeout_us are used.
++ *
++ * When available, you'll probably want to use one of the specialized
++ * macros defined below rather than this macro directly.
++ */
++#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us)	\
++	read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr)
++
+ /**
+  * readx_poll_timeout_atomic - Periodically poll an address until a condition is met or a timeout occurs
+  * @op: accessor function (takes @addr as its only argument)
+-- 
+2.18.0
+
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9992-dts-mt7986-wed-changes.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9992-dts-mt7986-wed-changes.patch
new file mode 100755
index 0000000..3b965ac
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9992-dts-mt7986-wed-changes.patch
@@ -0,0 +1,123 @@
+From b83743c16da6fa4da206df3e5a1a9c29485bb613 Mon Sep 17 00:00:00 2001
+From: Bo Jiao <Bo.Jiao@mediatek.com>
+Date: Wed, 22 Jun 2022 16:36:42 +0800
+Subject: [PATCH 3/8] 9992-dts-mt7986-wed-changes
+
+---
+ arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 33 ++++++++---------------
+ arch/arm64/boot/dts/mediatek/mt7986b.dtsi | 33 ++++++++---------------
+ 2 files changed, 22 insertions(+), 44 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+index ba27b95f5..7f78de6b9 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+@@ -58,32 +58,20 @@
+ 		};
+ 	};
+ 
+-	wed: wed@15010000 {
+-		compatible = "mediatek,wed";
+-		wed_num = <2>;
+-		/* add this property for wed get the pci slot number. */
+-		pci_slot_map = <0>, <1>;
+-		reg = <0 0x15010000 0 0x1000>,
+-		      <0 0x15011000 0 0x1000>;
++	wed0: wed@15010000 {
++		compatible = "mediatek,mt7986-wed",
++			     "syscon";
++		reg = <0 0x15010000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+-		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>,
+-			     <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
++		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
+ 	};
+ 
+-	wed2: wed2@15011000 {
+-		compatible = "mediatek,wed2";
+-		wed_num = <2>;
+-		reg = <0 0x15010000 0 0x1000>,
+-		      <0 0x15011000 0 0x1000>;
++	wed1: wed@15011000 {
++		compatible = "mediatek,mt7986-wed",
++			     "syscon";
++		reg = <0 0x15011000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+-		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>,
+-			     <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
+-	};
+-
+-	wdma: wdma@15104800 {
+-		compatible = "mediatek,wed-wdma";
+-		reg = <0 0x15104800 0 0x400>,
+-		      <0 0x15104c00 0 0x400>;
++		interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
+ 	};
+ 
+ 	ap2woccif: ap2woccif@151A5000 {
+@@ -490,6 +478,7 @@
+ 					 <&topckgen CK_TOP_CB_SGM_325M>;
+                 mediatek,ethsys = <&ethsys>;
+ 		mediatek,sgmiisys = <&sgmiisys0>, <&sgmiisys1>;
++		mediatek,wed = <&wed0>, <&wed1>;
+                 #reset-cells = <1>;
+                 #address-cells = <1>;
+                 #size-cells = <0>;
+diff --git a/arch/arm64/boot/dts/mediatek/mt7986b.dtsi b/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
+index 523d585cb..0e5f116a2 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
+@@ -58,32 +58,20 @@
+ 		};
+ 	};
+ 
+-	wed: wed@15010000 {
+-		compatible = "mediatek,wed";
+-		wed_num = <2>;
+-		/* add this property for wed get the pci slot number. */
+-		pci_slot_map = <0>, <1>;
+-		reg = <0 0x15010000 0 0x1000>,
+-		      <0 0x15011000 0 0x1000>;
++	wed0: wed@15010000 {
++		compatible = "mediatek,mt7986-wed",
++			     "syscon";
++		reg = <0 0x15010000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+-		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>,
+-			     <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
++		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
+ 	};
+ 
+-	wed2: wed2@15011000 {
+-		compatible = "mediatek,wed2";
+-		wed_num = <2>;
+-		reg = <0 0x15010000 0 0x1000>,
+-		      <0 0x15011000 0 0x1000>;
++	wed1: wed@15011000 {
++		compatible = "mediatek,mt7986-wed",
++			     "syscon";
++		reg = <0 0x15011000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+-		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>,
+-			     <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
+-	};
+-
+-	wdma: wdma@15104800 {
+-		compatible = "mediatek,wed-wdma";
+-		reg = <0 0x15104800 0 0x400>,
+-		      <0 0x15104c00 0 0x400>;
++		interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
+ 	};
+ 
+ 	ap2woccif: ap2woccif@151A5000 {
+@@ -405,6 +393,7 @@
+ 					 <&topckgen CK_TOP_CB_SGM_325M>;
+                 mediatek,ethsys = <&ethsys>;
+ 		mediatek,sgmiisys = <&sgmiisys0>, <&sgmiisys1>;
++		mediatek,wed = <&wed0>, <&wed1>;
+                 #reset-cells = <1>;
+                 #address-cells = <1>;
+                 #size-cells = <0>;
+-- 
+2.18.0
+
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9993-add-wed.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9993-add-wed.patch
new file mode 100755
index 0000000..06b0bcb
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9993-add-wed.patch
@@ -0,0 +1,3234 @@
+From 342fdc50b761309e75974554cdcf790a2d09e134 Mon Sep 17 00:00:00 2001
+From: Sujuan Chen <sujuan.chen@mediatek.com>
+Date: Thu, 2 Jun 2022 15:32:07 +0800
+Subject: [PATCH 4/8] 9993-add-wed
+
+Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
+---
+ arch/arm64/boot/dts/mediatek/mt7622.dtsi      |  32 +-
+ drivers/net/ethernet/mediatek/Kconfig         |   4 +
+ drivers/net/ethernet/mediatek/Makefile        |   5 +
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c   | 136 ++-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h   |  14 +-
+ drivers/net/ethernet/mediatek/mtk_ppe.c       | 373 +++++++-
+ drivers/net/ethernet/mediatek/mtk_ppe.h       |  89 +-
+ .../net/ethernet/mediatek/mtk_ppe_debugfs.c   |   4 +-
+ .../net/ethernet/mediatek/mtk_ppe_offload.c   | 167 +++-
+ drivers/net/ethernet/mediatek/mtk_wed.c       | 876 ++++++++++++++++++
+ drivers/net/ethernet/mediatek/mtk_wed.h       | 135 +++
+ .../net/ethernet/mediatek/mtk_wed_debugfs.c   | 175 ++++
+ drivers/net/ethernet/mediatek/mtk_wed_ops.c   |   8 +
+ drivers/net/ethernet/mediatek/mtk_wed_regs.h  | 251 +++++
+ include/linux/netdevice.h                     |   7 +
+ include/linux/soc/mediatek/mtk_wed.h          | 131 +++
+ net/core/dev.c                                |   4 +
+ 17 files changed, 2283 insertions(+), 128 deletions(-)
+ mode change 100755 => 100644 drivers/net/ethernet/mediatek/Kconfig
+ mode change 100755 => 100644 drivers/net/ethernet/mediatek/Makefile
+ mode change 100755 => 100644 drivers/net/ethernet/mediatek/mtk_eth_soc.c
+ mode change 100755 => 100644 drivers/net/ethernet/mediatek/mtk_eth_soc.h
+ mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_ppe.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed.h
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_ops.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_regs.h
+ create mode 100644 include/linux/soc/mediatek/mtk_wed.h
+
+diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+index 369e01389..d0fbc367e 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi
+@@ -338,7 +338,7 @@
+ 		};
+ 
+ 		cci_control2: slave-if@5000 {
+-			compatible = "arm,cci-400-ctrl-if";
++			compatible = "arm,cci-400-ctrl-if", "syscon";
+ 			interface-type = "ace";
+ 			reg = <0x5000 0x1000>;
+ 		};
+@@ -920,6 +920,11 @@
+ 		};
+ 	};
+ 
++	hifsys: syscon@1af00000 {
++		compatible = "mediatek,mt7622-hifsys", "syscon";
++		reg = <0 0x1af00000 0 0x70>;
++	};
++
+ 	ethsys: syscon@1b000000 {
+ 		compatible = "mediatek,mt7622-ethsys",
+ 			     "syscon";
+@@ -938,6 +943,26 @@
+ 		#dma-cells = <1>;
+ 	};
+ 
++	pcie_mirror: pcie-mirror@10000400 {
++		compatible = "mediatek,mt7622-pcie-mirror",
++			     "syscon";
++		reg = <0 0x10000400 0 0x10>;
++	};
++
++	wed0: wed@1020a000 {
++		compatible = "mediatek,mt7622-wed",
++			     "syscon";
++		reg = <0 0x1020a000 0 0x1000>;
++		interrupts = <GIC_SPI 214 IRQ_TYPE_LEVEL_LOW>;
++	};
++
++	wed1: wed@1020b000 {
++		compatible = "mediatek,mt7622-wed",
++			     "syscon";
++		reg = <0 0x1020b000 0 0x1000>;
++		interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_LOW>;
++	};
++
+ 	eth: ethernet@1b100000 {
+ 		compatible = "mediatek,mt7622-eth",
+ 			     "mediatek,mt2701-eth",
+@@ -964,6 +989,11 @@
+ 		power-domains = <&scpsys MT7622_POWER_DOMAIN_ETHSYS>;
+ 		mediatek,ethsys = <&ethsys>;
+ 		mediatek,sgmiisys = <&sgmiisys>;
++		mediatek,cci-control = <&cci_control2>;
++		mediatek,wed = <&wed0>, <&wed1>;
++		mediatek,pcie-mirror = <&pcie_mirror>;
++		mediatek,hifsys = <&hifsys>;
++		dma-coherent;
+ 		#address-cells = <1>;
+ 		#size-cells = <0>;
+ 		status = "disabled";
+diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
+old mode 100755
+new mode 100644
+index 42e6b38d2..8ab6615a3
+--- a/drivers/net/ethernet/mediatek/Kconfig
++++ b/drivers/net/ethernet/mediatek/Kconfig
+@@ -7,6 +7,10 @@ config NET_VENDOR_MEDIATEK
+ 
+ if NET_VENDOR_MEDIATEK
+ 
++config NET_MEDIATEK_SOC_WED
++	depends on ARCH_MEDIATEK || COMPILE_TEST
++	def_bool NET_MEDIATEK_SOC != n
++
+ config NET_MEDIATEK_SOC
+ 	tristate "MediaTek SoC Gigabit Ethernet support"
+ 	select PHYLINK
+diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
+old mode 100755
+new mode 100644
+index 0a6af99f1..3528f1b3c
+--- a/drivers/net/ethernet/mediatek/Makefile
++++ b/drivers/net/ethernet/mediatek/Makefile
+@@ -6,4 +6,9 @@
+ obj-$(CONFIG_NET_MEDIATEK_SOC)			+= mtk_eth.o
+ mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o	\
+ 	     mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
++mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed.o
++ifdef CONFIG_DEBUG_FS
++mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_debugfs.o
++endif
++obj-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_ops.o
+ obj-$(CONFIG_NET_MEDIATEK_HNAT)			+= mtk_hnat/
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+old mode 100755
+new mode 100644
+index 819d8a0be..2121335a1
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -9,6 +9,7 @@
+ #include <linux/of_device.h>
+ #include <linux/of_mdio.h>
+ #include <linux/of_net.h>
++#include <linux/of_address.h>
+ #include <linux/mfd/syscon.h>
+ #include <linux/regmap.h>
+ #include <linux/clk.h>
+@@ -19,12 +20,14 @@
+ #include <linux/interrupt.h>
+ #include <linux/pinctrl/devinfo.h>
+ #include <linux/phylink.h>
++#include <linux/bitfield.h>
+ #include <net/dsa.h>
+ 
+ #include "mtk_eth_soc.h"
+ #include "mtk_eth_dbg.h"
+ #include "mtk_eth_reset.h"
+ #include "mtk_hnat/hnat.h"
++#include "mtk_wed.h"
+ 
+ #if defined(CONFIG_NET_MEDIATEK_HNAT) || defined(CONFIG_NET_MEDIATEK_HNAT_MODULE)
+ #include "mtk_hnat/nf_hnat_mtk.h"
+@@ -850,7 +853,7 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
+ 	int i;
+ 
+ 	if (!eth->soc->has_sram) {
+-		eth->scratch_ring = dma_alloc_coherent(eth->dev,
++		eth->scratch_ring = dma_alloc_coherent(eth->dma_dev,
+ 					       cnt * sizeof(struct mtk_tx_dma),
+ 					       &eth->phy_scratch_ring,
+ 					       GFP_ATOMIC);
+@@ -866,10 +869,10 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
+ 	if (unlikely(!eth->scratch_head))
+ 		return -ENOMEM;
+ 
+-	dma_addr = dma_map_single(eth->dev,
++	dma_addr = dma_map_single(eth->dma_dev,
+ 				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
+ 				  DMA_FROM_DEVICE);
+-	if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
++	if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
+ 		return -ENOMEM;
+ 
+ 	phy_ring_tail = eth->phy_scratch_ring +
+@@ -933,26 +936,26 @@ static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf,
+ {
+ 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+ 		if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) {
+-			dma_unmap_single(eth->dev,
++			dma_unmap_single(eth->dma_dev,
+ 					 dma_unmap_addr(tx_buf, dma_addr0),
+ 					 dma_unmap_len(tx_buf, dma_len0),
+ 					 DMA_TO_DEVICE);
+ 		} else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) {
+-			dma_unmap_page(eth->dev,
++			dma_unmap_page(eth->dma_dev,
+ 				       dma_unmap_addr(tx_buf, dma_addr0),
+ 				       dma_unmap_len(tx_buf, dma_len0),
+ 				       DMA_TO_DEVICE);
+ 		}
+ 	} else {
+ 		if (dma_unmap_len(tx_buf, dma_len0)) {
+-			dma_unmap_page(eth->dev,
++			dma_unmap_page(eth->dma_dev,
+ 				       dma_unmap_addr(tx_buf, dma_addr0),
+ 				       dma_unmap_len(tx_buf, dma_len0),
+ 				       DMA_TO_DEVICE);
+ 		}
+ 
+ 		if (dma_unmap_len(tx_buf, dma_len1)) {
+-			dma_unmap_page(eth->dev,
++			dma_unmap_page(eth->dma_dev,
+ 				       dma_unmap_addr(tx_buf, dma_addr1),
+ 				       dma_unmap_len(tx_buf, dma_len1),
+ 				       DMA_TO_DEVICE);
+@@ -1017,9 +1020,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
+ 	itx_buf = mtk_desc_to_tx_buf(ring, itxd);
+ 	memset(itx_buf, 0, sizeof(*itx_buf));
+ 
+-	mapped_addr = dma_map_single(eth->dev, skb->data,
++	mapped_addr = dma_map_single(eth->dma_dev, skb->data,
+ 				     skb_headlen(skb), DMA_TO_DEVICE);
+-	if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
++	if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr)))
+ 		return -ENOMEM;
+ 
+ 	WRITE_ONCE(itxd->txd1, mapped_addr);
+@@ -1114,10 +1117,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev,
+ 
+ 
+ 			frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN);
+-			mapped_addr = skb_frag_dma_map(eth->dev, frag, offset,
++			mapped_addr = skb_frag_dma_map(eth->dma_dev, frag, offset,
+ 						       frag_map_size,
+ 						       DMA_TO_DEVICE);
+-			if (unlikely(dma_mapping_error(eth->dev, mapped_addr)))
++			if (unlikely(dma_mapping_error(eth->dma_dev, mapped_addr)))
+ 				goto err_dma;
+ 
+ 			if (i == nr_frags - 1 &&
+@@ -1384,6 +1387,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 		struct net_device *netdev;
+ 		unsigned int pktlen;
+ 		dma_addr_t dma_addr;
++		u32 hash, reason;
+ 		int mac;
+ 
+ 		if (eth->hwlro)
+@@ -1427,18 +1431,18 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 			netdev->stats.rx_dropped++;
+ 			goto release_desc;
+ 		}
+-		dma_addr = dma_map_single(eth->dev,
++		dma_addr = dma_map_single(eth->dma_dev,
+ 					  new_data + NET_SKB_PAD +
+ 					  eth->ip_align,
+ 					  ring->buf_size,
+ 					  DMA_FROM_DEVICE);
+-		if (unlikely(dma_mapping_error(eth->dev, dma_addr))) {
++		if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr))) {
+ 			skb_free_frag(new_data);
+ 			netdev->stats.rx_dropped++;
+ 			goto release_desc;
+ 		}
+ 
+-		dma_unmap_single(eth->dev, trxd.rxd1,
++		dma_unmap_single(eth->dma_dev, trxd.rxd1,
+ 				 ring->buf_size, DMA_FROM_DEVICE);
+ 
+ 		/* receive data */
+@@ -1463,6 +1467,17 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 			skb_checksum_none_assert(skb);
+ 		skb->protocol = eth_type_trans(skb, netdev);
+ 
++		hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
++		if (hash != MTK_RXD4_FOE_ENTRY) {
++			hash = jhash_1word(hash, 0);
++			skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
++		}
++
++		reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4);
++		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
++			mtk_ppe_check_skb(eth->ppe, skb,
++					  trxd.rxd4 & MTK_RXD4_FOE_ENTRY);
++
+ 		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+ 			if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+ 				if (trxd.rxd3 & RX_DMA_VTAG_V2)
+@@ -1748,7 +1763,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
+ 		goto no_tx_mem;
+ 
+ 	if (!eth->soc->has_sram)
+-		ring->dma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
++		ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
+ 					       &ring->phys, GFP_ATOMIC);
+ 	else {
+ 		ring->dma =  eth->scratch_ring + MTK_DMA_SIZE;
+@@ -1780,7 +1795,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
+ 	 * descriptors in ring->dma_pdma.
+ 	 */
+ 	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+-		ring->dma_pdma = dma_alloc_coherent(eth->dev, MTK_DMA_SIZE * sz,
++		ring->dma_pdma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
+ 						    &ring->phys_pdma,
+ 						    GFP_ATOMIC);
+ 		if (!ring->dma_pdma)
+@@ -1839,7 +1854,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
+ 	}
+ 
+ 	if (!eth->soc->has_sram && ring->dma) {
+-		dma_free_coherent(eth->dev,
++		dma_free_coherent(eth->dma_dev,
+ 				  MTK_DMA_SIZE * sizeof(*ring->dma),
+ 				  ring->dma,
+ 				  ring->phys);
+@@ -1847,7 +1862,7 @@ static void mtk_tx_clean(struct mtk_eth *eth)
+ 	}
+ 
+ 	if (ring->dma_pdma) {
+-		dma_free_coherent(eth->dev,
++		dma_free_coherent(eth->dma_dev,
+ 				  MTK_DMA_SIZE * sizeof(*ring->dma_pdma),
+ 				  ring->dma_pdma,
+ 				  ring->phys_pdma);
+@@ -1892,7 +1907,7 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
+ 
+ 	if ((!eth->soc->has_sram) || (eth->soc->has_sram
+ 				&& (rx_flag != MTK_RX_FLAGS_NORMAL)))
+-		ring->dma = dma_alloc_coherent(eth->dev,
++		ring->dma = dma_alloc_coherent(eth->dma_dev,
+ 					       rx_dma_size * sizeof(*ring->dma),
+ 					       &ring->phys, GFP_ATOMIC);
+ 	else {
+@@ -1907,11 +1922,11 @@ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
+ 		return -ENOMEM;
+ 
+ 	for (i = 0; i < rx_dma_size; i++) {
+-		dma_addr_t dma_addr = dma_map_single(eth->dev,
++		dma_addr_t dma_addr = dma_map_single(eth->dma_dev,
+ 				ring->data[i] + NET_SKB_PAD + eth->ip_align,
+ 				ring->buf_size,
+ 				DMA_FROM_DEVICE);
+-		if (unlikely(dma_mapping_error(eth->dev, dma_addr)))
++		if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
+ 			return -ENOMEM;
+ 		ring->dma[i].rxd1 = (unsigned int)dma_addr;
+ 
+@@ -1968,7 +1983,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring, int in_s
+ 				continue;
+ 			if (!ring->dma[i].rxd1)
+ 				continue;
+-			dma_unmap_single(eth->dev,
++			dma_unmap_single(eth->dma_dev,
+ 					 ring->dma[i].rxd1,
+ 					 ring->buf_size,
+ 					 DMA_FROM_DEVICE);
+@@ -1982,7 +1997,7 @@ static void mtk_rx_clean(struct mtk_eth *eth, struct mtk_rx_ring *ring, int in_s
+ 		return;
+ 
+ 	if (ring->dma) {
+-		dma_free_coherent(eth->dev,
++		dma_free_coherent(eth->dma_dev,
+ 				  ring->dma_size * sizeof(*ring->dma),
+ 				  ring->dma,
+ 				  ring->phys);
+@@ -2462,7 +2477,7 @@ static void mtk_dma_free(struct mtk_eth *eth)
+ 		if (eth->netdev[i])
+ 			netdev_reset_queue(eth->netdev[i]);
+ 	if ( !eth->soc->has_sram && eth->scratch_ring) {
+-		dma_free_coherent(eth->dev,
++		dma_free_coherent(eth->dma_dev,
+ 				  MTK_DMA_SIZE * sizeof(struct mtk_tx_dma),
+ 				  eth->scratch_ring,
+ 				  eth->phy_scratch_ring);
+@@ -2661,7 +2676,7 @@ static int mtk_open(struct net_device *dev)
+ 		if (err)
+ 			return err;
+ 
+-		if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
++		if (eth->soc->offload_version && mtk_ppe_start(eth->ppe) == 0)
+ 			gdm_config = MTK_GDMA_TO_PPE;
+ 
+ 		mtk_gdm_config(eth, gdm_config);
+@@ -2778,7 +2793,7 @@ static int mtk_stop(struct net_device *dev)
+ 	mtk_dma_free(eth);
+ 
+ 	if (eth->soc->offload_version)
+-		mtk_ppe_stop(&eth->ppe);
++		mtk_ppe_stop(eth->ppe);
+ 
+ 	return 0;
+ }
+@@ -2855,6 +2870,8 @@ static int mtk_napi_init(struct mtk_eth *eth)
+ 
+ static int mtk_hw_init(struct mtk_eth *eth, u32 type)
+ {
++	u32 dma_mask = ETHSYS_DMA_AG_MAP_PDMA | ETHSYS_DMA_AG_MAP_QDMA |
++		       ETHSYS_DMA_AG_MAP_PPE;
+ 	int i, ret = 0;
+ 
+ 	pr_info("[%s] reset_lock:%d, force:%d\n", __func__,
+@@ -2872,6 +2889,10 @@ static int mtk_hw_init(struct mtk_eth *eth, u32 type)
+ 			goto err_disable_pm;
+ 	}
+ 
++	if (eth->ethsys)
++		regmap_update_bits(eth->ethsys, ETHSYS_DMA_AG_MAP, dma_mask,
++				   of_dma_is_coherent(eth->dma_dev->of_node) * dma_mask);
++
+ 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628)) {
+ 		ret = device_reset(eth->dev);
+ 		if (ret) {
+@@ -3501,6 +3522,35 @@ free_netdev:
+ 	return err;
+ }
+ 
++void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev)
++{
++	struct net_device *dev, *tmp;
++	LIST_HEAD(dev_list);
++	int i;
++
++	rtnl_lock();
++
++	for (i = 0; i < MTK_MAC_COUNT; i++) {
++		dev = eth->netdev[i];
++
++		if (!dev || !(dev->flags & IFF_UP))
++			continue;
++
++		list_add_tail(&dev->close_list, &dev_list);
++	}
++
++	dev_close_many(&dev_list, false);
++
++	eth->dma_dev = dma_dev;
++
++	list_for_each_entry_safe(dev, tmp, &dev_list, close_list) {
++		list_del_init(&dev->close_list);
++		dev_open(dev, NULL);
++	}
++
++	rtnl_unlock();
++}
++
+ static int mtk_probe(struct platform_device *pdev)
+ {
+ 	struct device_node *mac_np;
+@@ -3514,6 +3564,7 @@ static int mtk_probe(struct platform_device *pdev)
+ 	eth->soc = of_device_get_match_data(&pdev->dev);
+ 
+ 	eth->dev = &pdev->dev;
++	eth->dma_dev = &pdev->dev;
+ 	eth->base = devm_platform_ioremap_resource(pdev, 0);
+ 	if (IS_ERR(eth->base))
+ 		return PTR_ERR(eth->base);
+@@ -3567,6 +3618,16 @@ static int mtk_probe(struct platform_device *pdev)
+ 		}
+ 	}
+ 
++	if (of_dma_is_coherent(pdev->dev.of_node)) {
++		struct regmap *cci;
++
++		cci = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
++						      "mediatek,cci-control");
++		/* enable CPU/bus coherency */
++		if (!IS_ERR(cci))
++			regmap_write(cci, 0, 3);
++	}
++
+ 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) {
+ 		eth->sgmii = devm_kzalloc(eth->dev, sizeof(*eth->sgmii),
+ 					  GFP_KERNEL);
+@@ -3589,6 +3650,22 @@ static int mtk_probe(struct platform_device *pdev)
+ 		}
+ 	}
+ 
++	for (i = 0;; i++) {
++		struct device_node *np = of_parse_phandle(pdev->dev.of_node,
++							  "mediatek,wed", i);
++		static const u32 wdma_regs[] = {
++			MTK_WDMA0_BASE,
++			MTK_WDMA1_BASE
++		};
++		void __iomem *wdma;
++
++		if (!np || i >= ARRAY_SIZE(wdma_regs))
++			break;
++
++		wdma = eth->base + wdma_regs[i];
++		mtk_wed_add_hw(np, eth, wdma, i);
++	}
++
+ 	for (i = 0; i < MTK_MAX_IRQ_NUM; i++) {
+ 		if (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_INT) && i > 0)
+ 			eth->irq[i] = eth->irq[0];
+@@ -3692,10 +3769,11 @@ static int mtk_probe(struct platform_device *pdev)
+ 	}
+ 
+ 	if (eth->soc->offload_version) {
+-		err = mtk_ppe_init(&eth->ppe, eth->dev,
+-				   eth->base + MTK_ETH_PPE_BASE, 2);
+-		if (err)
++		eth->ppe = mtk_ppe_init(eth, eth->base + MTK_ETH_PPE_BASE, 2);
++		if (!eth->ppe) {
++			err = -ENOMEM;
+ 			goto err_free_dev;
++		}
+ 
+ 		err = mtk_eth_offload_init(eth);
+ 		if (err)
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+old mode 100755
+new mode 100644
+index 349f98503..b52378bd6
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -517,6 +517,9 @@
+ #define RX_DMA_SPORT_MASK       0x7
+ #endif
+ 
++#define MTK_WDMA0_BASE		0x2800
++#define MTK_WDMA1_BASE		0x2c00
++
+ /* QDMA descriptor txd4 */
+ #define TX_DMA_CHKSUM		(0x7 << 29)
+ #define TX_DMA_TSO		BIT(28)
+@@ -704,6 +707,12 @@
+ #define ETHSYS_FE_RST_CHK_IDLE_EN 	0x28
+ 
+ 
++/* ethernet dma channel agent map */
++#define ETHSYS_DMA_AG_MAP	0x408
++#define ETHSYS_DMA_AG_MAP_PDMA	BIT(0)
++#define ETHSYS_DMA_AG_MAP_QDMA	BIT(1)
++#define ETHSYS_DMA_AG_MAP_PPE	BIT(2)
++
+ /* SGMII subsystem config registers */
+ /* Register to auto-negotiation restart */
+ #define SGMSYS_PCS_CONTROL_1	0x0
+@@ -1209,6 +1218,7 @@ struct mtk_reset_event {
+ /* struct mtk_eth -	This is the main datasructure for holding the state
+  *			of the driver
+  * @dev:		The device pointer
++ * @dev:		The device pointer used for dma mapping/alloc
+  * @base:		The mapped register i/o base
+  * @page_lock:		Make sure that register operations are atomic
+  * @tx_irq__lock:	Make sure that IRQ register operations are atomic
+@@ -1243,6 +1253,7 @@ struct mtk_reset_event {
+ 
+ struct mtk_eth {
+ 	struct device			*dev;
++	struct device			*dma_dev;
+ 	void __iomem			*base;
+ 	spinlock_t			page_lock;
+ 	spinlock_t			tx_irq_lock;
+@@ -1283,7 +1294,7 @@ struct mtk_eth {
+ 	spinlock_t			syscfg0_lock;
+ 	struct timer_list		mtk_dma_monitor_timer;
+ 
+-	struct mtk_ppe			ppe;
++	struct mtk_ppe			*ppe;
+ 	struct rhashtable		flow_table;
+ };
+ 
+@@ -1336,5 +1347,6 @@ void ethsys_reset(struct mtk_eth *eth, u32 reset_bits);
+ int mtk_eth_offload_init(struct mtk_eth *eth);
+ int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ 		     void *type_data);
++void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
+ 
+ #endif /* MTK_ETH_H */
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+old mode 100644
+new mode 100755
+index 66298e223..3d75c22be
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -6,9 +6,22 @@
+ #include <linux/iopoll.h>
+ #include <linux/etherdevice.h>
+ #include <linux/platform_device.h>
++#include <linux/if_ether.h>
++#include <linux/if_vlan.h>
++#include <net/dsa.h>
++#include "mtk_eth_soc.h"
+ #include "mtk_ppe.h"
+ #include "mtk_ppe_regs.h"
+ 
++static DEFINE_SPINLOCK(ppe_lock);
++
++static const struct rhashtable_params mtk_flow_l2_ht_params = {
++	.head_offset = offsetof(struct mtk_flow_entry, l2_node),
++	.key_offset = offsetof(struct mtk_flow_entry, data.bridge),
++	.key_len = offsetof(struct mtk_foe_bridge, key_end),
++	.automatic_shrinking = true,
++};
++
+ static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
+ {
+ 	writel(val, ppe->base + reg);
+@@ -41,6 +54,11 @@ static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
+ 	return ppe_m32(ppe, reg, val, 0);
+ }
+ 
++static u32 mtk_eth_timestamp(struct mtk_eth *eth)
++{
++	return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
++}
++
+ static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
+ {
+ 	int ret;
+@@ -76,13 +94,6 @@ static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
+ 	u32 hash;
+ 
+ 	switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
+-		case MTK_PPE_PKT_TYPE_BRIDGE:
+-			hv1 = e->bridge.src_mac_lo;
+-			hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
+-			hv2 = e->bridge.src_mac_hi >> 16;
+-			hv2 ^= e->bridge.dest_mac_lo;
+-			hv3 = e->bridge.dest_mac_hi;
+-			break;
+ 		case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
+ 		case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
+ 			hv1 = e->ipv4.orig.ports;
+@@ -122,6 +133,9 @@ mtk_foe_entry_l2(struct mtk_foe_entry *entry)
+ {
+ 	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ 
++	if (type == MTK_PPE_PKT_TYPE_BRIDGE)
++		return &entry->bridge.l2;
++
+ 	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+ 		return &entry->ipv6.l2;
+ 
+@@ -133,6 +147,9 @@ mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
+ {
+ 	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ 
++	if (type == MTK_PPE_PKT_TYPE_BRIDGE)
++		return &entry->bridge.ib2;
++
+ 	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
+ 		return &entry->ipv6.ib2;
+ 
+@@ -167,7 +184,12 @@ int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+ 	if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
+ 		entry->ipv6.ports = ports_pad;
+ 
+-	if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
++	if (type == MTK_PPE_PKT_TYPE_BRIDGE) {
++		ether_addr_copy(entry->bridge.src_mac, src_mac);
++		ether_addr_copy(entry->bridge.dest_mac, dest_mac);
++		entry->bridge.ib2 = val;
++		l2 = &entry->bridge.l2;
++	} else if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
+ 		entry->ipv6.ib2 = val;
+ 		l2 = &entry->ipv6.l2;
+ 	} else {
+@@ -329,32 +351,167 @@ int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
+ 	return 0;
+ }
+ 
++int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq,
++			   int bss, int wcid)
++{
++	struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
++	u32 *ib2 = mtk_foe_entry_ib2(entry);
++
++	*ib2 &= ~MTK_FOE_IB2_PORT_MG;
++	*ib2 |= MTK_FOE_IB2_WDMA_WINFO;
++	if (wdma_idx)
++		*ib2 |= MTK_FOE_IB2_WDMA_DEVIDX;
++
++	l2->vlan2 = FIELD_PREP(MTK_FOE_VLAN2_WINFO_BSS, bss) |
++		    FIELD_PREP(MTK_FOE_VLAN2_WINFO_WCID, wcid) |
++		    FIELD_PREP(MTK_FOE_VLAN2_WINFO_RING, txq);
++
++	return 0;
++}
++
+ static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
+ {
+ 	return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
+ 	       FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
+ }
+ 
+-int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+-			 u16 timestamp)
++static bool
++mtk_flow_entry_match(struct mtk_flow_entry *entry, struct mtk_foe_entry *data)
++{
++	int type, len;
++
++	if ((data->ib1 ^ entry->data.ib1) & MTK_FOE_IB1_UDP)
++		return false;
++
++	type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->data.ib1);
++	if (type > MTK_PPE_PKT_TYPE_IPV4_DSLITE)
++		len = offsetof(struct mtk_foe_entry, ipv6._rsv);
++	else
++		len = offsetof(struct mtk_foe_entry, ipv4.ib2);
++
++	return !memcmp(&entry->data.data, &data->data, len - 4);
++}
++
++static void
++__mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
++{
++	struct hlist_head *head;
++	struct hlist_node *tmp;
++
++	if (entry->type == MTK_FLOW_TYPE_L2) {
++		rhashtable_remove_fast(&ppe->l2_flows, &entry->l2_node,
++				       mtk_flow_l2_ht_params);
++
++		head = &entry->l2_flows;
++		hlist_for_each_entry_safe(entry, tmp, head, l2_data.list)
++			__mtk_foe_entry_clear(ppe, entry);
++		return;
++	}
++
++	hlist_del_init(&entry->list);
++	if (entry->hash != 0xffff) {
++		ppe->foe_table[entry->hash].ib1 &= ~MTK_FOE_IB1_STATE;
++		ppe->foe_table[entry->hash].ib1 |= FIELD_PREP(MTK_FOE_IB1_STATE,
++							      MTK_FOE_STATE_INVALID);
++		dma_wmb();
++	}
++	entry->hash = 0xffff;
++
++	if (entry->type != MTK_FLOW_TYPE_L2_SUBFLOW)
++		return;
++
++	hlist_del_init(&entry->l2_data.list);
++	kfree(entry);
++}
++
++static int __mtk_foe_entry_idle_time(struct mtk_ppe *ppe, u32 ib1)
++{
++	u16 timestamp;
++	u16 now;
++
++	now = mtk_eth_timestamp(ppe->eth) & MTK_FOE_IB1_BIND_TIMESTAMP;
++	timestamp = ib1 & MTK_FOE_IB1_BIND_TIMESTAMP;
++
++	if (timestamp > now)
++		return MTK_FOE_IB1_BIND_TIMESTAMP + 1 - timestamp + now;
++	else
++		return now - timestamp;
++}
++
++static void
++mtk_flow_entry_update_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+ {
++	struct mtk_flow_entry *cur;
+ 	struct mtk_foe_entry *hwe;
+-	u32 hash;
++	struct hlist_node *tmp;
++	int idle;
++
++	idle = __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
++	hlist_for_each_entry_safe(cur, tmp, &entry->l2_flows, l2_data.list) {
++		int cur_idle;
++		u32 ib1;
++
++		hwe = &ppe->foe_table[cur->hash];
++		ib1 = READ_ONCE(hwe->ib1);
++
++		if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND) {
++			cur->hash = 0xffff;
++			__mtk_foe_entry_clear(ppe, cur);
++			continue;
++		}
++
++		cur_idle = __mtk_foe_entry_idle_time(ppe, ib1);
++		if (cur_idle >= idle)
++			continue;
++
++		idle = cur_idle;
++		entry->data.ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
++		entry->data.ib1 |= hwe->ib1 & MTK_FOE_IB1_BIND_TIMESTAMP;
++	}
++}
++
++static void
++mtk_flow_entry_update(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
++{
++	struct mtk_foe_entry *hwe;
++	struct mtk_foe_entry foe;
++
++	spin_lock_bh(&ppe_lock);
++
++	if (entry->type == MTK_FLOW_TYPE_L2) {
++		mtk_flow_entry_update_l2(ppe, entry);
++		goto out;
++	}
++
++	if (entry->hash == 0xffff)
++		goto out;
++
++	hwe = &ppe->foe_table[entry->hash];
++	memcpy(&foe, hwe, sizeof(foe));
++	if (!mtk_flow_entry_match(entry, &foe)) {
++		entry->hash = 0xffff;
++		goto out;
++	}
++
++	entry->data.ib1 = foe.ib1;
++
++out:
++	spin_unlock_bh(&ppe_lock);
++}
++
++static void
++__mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
++		       u16 hash)
++{
++	struct mtk_foe_entry *hwe;
++	u16 timestamp;
+ 
++	timestamp = mtk_eth_timestamp(ppe->eth);
+ 	timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
+ 	entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
+ 	entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
+ 
+-	hash = mtk_ppe_hash_entry(entry);
+ 	hwe = &ppe->foe_table[hash];
+-	if (!mtk_foe_entry_usable(hwe)) {
+-		hwe++;
+-		hash++;
+-
+-		if (!mtk_foe_entry_usable(hwe))
+-			return -ENOSPC;
+-	}
+-
+ 	memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
+ 	wmb();
+ 	hwe->ib1 = entry->ib1;
+@@ -362,32 +519,197 @@ int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+ 	dma_wmb();
+ 
+ 	mtk_ppe_cache_clear(ppe);
++}
+ 
+-	return hash;
++void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
++{
++	spin_lock_bh(&ppe_lock);
++	__mtk_foe_entry_clear(ppe, entry);
++	spin_unlock_bh(&ppe_lock);
++}
++
++static int
++mtk_foe_entry_commit_l2(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
++{
++	entry->type = MTK_FLOW_TYPE_L2;
++
++	return rhashtable_insert_fast(&ppe->l2_flows, &entry->l2_node,
++				      mtk_flow_l2_ht_params);
++}
++
++int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
++{
++	int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->data.ib1);
++	u32 hash;
++
++	if (type == MTK_PPE_PKT_TYPE_BRIDGE)
++		return mtk_foe_entry_commit_l2(ppe, entry);
++
++	hash = mtk_ppe_hash_entry(&entry->data);
++	entry->hash = 0xffff;
++	spin_lock_bh(&ppe_lock);
++	hlist_add_head(&entry->list, &ppe->foe_flow[hash / 4]);
++	spin_unlock_bh(&ppe_lock);
++
++	return 0;
++}
++
++static void
++mtk_foe_entry_commit_subflow(struct mtk_ppe *ppe, struct mtk_flow_entry *entry,
++			     u16 hash)
++{
++	struct mtk_flow_entry *flow_info;
++	struct mtk_foe_entry foe, *hwe;
++	struct mtk_foe_mac_info *l2;
++	u32 ib1_mask = MTK_FOE_IB1_PACKET_TYPE | MTK_FOE_IB1_UDP;
++	int type;
++
++	flow_info = kzalloc(offsetof(struct mtk_flow_entry, l2_data.end),
++			    GFP_ATOMIC);
++	if (!flow_info)
++		return;
++
++	flow_info->l2_data.base_flow = entry;
++	flow_info->type = MTK_FLOW_TYPE_L2_SUBFLOW;
++	flow_info->hash = hash;
++	hlist_add_head(&flow_info->list, &ppe->foe_flow[hash / 4]);
++	hlist_add_head(&flow_info->l2_data.list, &entry->l2_flows);
++
++	hwe = &ppe->foe_table[hash];
++	memcpy(&foe, hwe, sizeof(foe));
++	foe.ib1 &= ib1_mask;
++	foe.ib1 |= entry->data.ib1 & ~ib1_mask;
++
++	l2 = mtk_foe_entry_l2(&foe);
++	memcpy(l2, &entry->data.bridge.l2, sizeof(*l2));
++
++	type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, foe.ib1);
++	if (type == MTK_PPE_PKT_TYPE_IPV4_HNAPT)
++		memcpy(&foe.ipv4.new, &foe.ipv4.orig, sizeof(foe.ipv4.new));
++	else if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T && l2->etype == ETH_P_IP)
++		l2->etype = ETH_P_IPV6;
++
++	*mtk_foe_entry_ib2(&foe) = entry->data.bridge.ib2;
++
++	__mtk_foe_entry_commit(ppe, &foe, hash);
+ }
+ 
+-int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
++void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
++{
++	struct hlist_head *head = &ppe->foe_flow[hash / 4];
++	struct mtk_foe_entry *hwe = &ppe->foe_table[hash];
++	struct mtk_flow_entry *entry;
++	struct mtk_foe_bridge key = {};
++	struct ethhdr *eh;
++	bool found = false;
++	u8 *tag;
++
++	spin_lock_bh(&ppe_lock);
++
++	if (FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) == MTK_FOE_STATE_BIND)
++		goto out;
++
++	hlist_for_each_entry(entry, head, list) {
++		if (entry->type == MTK_FLOW_TYPE_L2_SUBFLOW) {
++			if (unlikely(FIELD_GET(MTK_FOE_IB1_STATE, hwe->ib1) ==
++				     MTK_FOE_STATE_BIND))
++				continue;
++
++			entry->hash = 0xffff;
++			__mtk_foe_entry_clear(ppe, entry);
++			continue;
++		}
++
++		if (found || !mtk_flow_entry_match(entry, hwe)) {
++			if (entry->hash != 0xffff)
++				entry->hash = 0xffff;
++			continue;
++		}
++
++		entry->hash = hash;
++		__mtk_foe_entry_commit(ppe, &entry->data, hash);
++		found = true;
++	}
++
++	if (found)
++		goto out;
++
++	if (!skb)
++		goto out;
++
++	eh = eth_hdr(skb);
++	ether_addr_copy(key.dest_mac, eh->h_dest);
++	ether_addr_copy(key.src_mac, eh->h_source);
++	tag = skb->data - 2;
++	key.vlan = 0;
++	switch (skb->protocol) {
++#if IS_ENABLED(CONFIG_NET_DSA)
++	case htons(ETH_P_XDSA):
++		if (!netdev_uses_dsa(skb->dev) ||
++		    skb->dev->dsa_ptr->tag_ops->proto != DSA_TAG_PROTO_MTK)
++			goto out;
++
++		tag += 4;
++		if (get_unaligned_be16(tag) != ETH_P_8021Q)
++			break;
++
++		fallthrough;
++#endif
++	case htons(ETH_P_8021Q):
++		key.vlan = get_unaligned_be16(tag + 2) & VLAN_VID_MASK;
++		break;
++	default:
++		break;
++	}
++
++	entry = rhashtable_lookup_fast(&ppe->l2_flows, &key, mtk_flow_l2_ht_params);
++	if (!entry)
++		goto out;
++
++	mtk_foe_entry_commit_subflow(ppe, entry, hash);
++
++out:
++	spin_unlock_bh(&ppe_lock);
++}
++
++int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
++{
++	mtk_flow_entry_update(ppe, entry);
++
++	return __mtk_foe_entry_idle_time(ppe, entry->data.ib1);
++}
++
++struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base,
+ 		 int version)
+ {
++	struct device *dev = eth->dev;
+ 	struct mtk_foe_entry *foe;
++	struct mtk_ppe *ppe;
++
++	ppe = devm_kzalloc(dev, sizeof(*ppe), GFP_KERNEL);
++	if (!ppe)
++		return NULL;
++
++	rhashtable_init(&ppe->l2_flows, &mtk_flow_l2_ht_params);
+ 
+ 	/* need to allocate a separate device, since it PPE DMA access is
+ 	 * not coherent.
+ 	 */
+ 	ppe->base = base;
++	ppe->eth = eth;
+ 	ppe->dev = dev;
+ 	ppe->version = version;
+ 
+ 	foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
+ 				  &ppe->foe_phys, GFP_KERNEL);
+ 	if (!foe)
+-		return -ENOMEM;
++		return NULL;
+ 
+ 	ppe->foe_table = foe;
+ 
+ 	mtk_ppe_debugfs_init(ppe);
+ 
+-	return 0;
++	return ppe;
+ }
+ 
+ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
+@@ -395,7 +717,7 @@ static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
+ 	static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
+ 	int i, k;
+ 
+-	memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
++	memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(ppe->foe_table));
+ 
+ 	if (!IS_ENABLED(CONFIG_SOC_MT7621))
+ 		return;
+@@ -443,7 +765,6 @@ int mtk_ppe_start(struct mtk_ppe *ppe)
+ 	      MTK_PPE_FLOW_CFG_IP4_NAT |
+ 	      MTK_PPE_FLOW_CFG_IP4_NAPT |
+ 	      MTK_PPE_FLOW_CFG_IP4_DSLITE |
+-	      MTK_PPE_FLOW_CFG_L2_BRIDGE |
+ 	      MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
+ 	ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
+index 242fb8f2a..1f5cf1c9a 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
+@@ -6,6 +6,7 @@
+ 
+ #include <linux/kernel.h>
+ #include <linux/bitfield.h>
++#include <linux/rhashtable.h>
+ 
+ #define MTK_ETH_PPE_BASE		0xc00
+ 
+@@ -48,9 +49,9 @@ enum {
+ #define MTK_FOE_IB2_DEST_PORT		GENMASK(7, 5)
+ #define MTK_FOE_IB2_MULTICAST		BIT(8)
+ 
+-#define MTK_FOE_IB2_WHNAT_QID2		GENMASK(13, 12)
+-#define MTK_FOE_IB2_WHNAT_DEVIDX	BIT(16)
+-#define MTK_FOE_IB2_WHNAT_NAT		BIT(17)
++#define MTK_FOE_IB2_WDMA_QID2		GENMASK(13, 12)
++#define MTK_FOE_IB2_WDMA_DEVIDX		BIT(16)
++#define MTK_FOE_IB2_WDMA_WINFO		BIT(17)
+ 
+ #define MTK_FOE_IB2_PORT_MG		GENMASK(17, 12)
+ 
+@@ -58,9 +59,9 @@ enum {
+ 
+ #define MTK_FOE_IB2_DSCP		GENMASK(31, 24)
+ 
+-#define MTK_FOE_VLAN2_WHNAT_BSS		GEMMASK(5, 0)
+-#define MTK_FOE_VLAN2_WHNAT_WCID	GENMASK(13, 6)
+-#define MTK_FOE_VLAN2_WHNAT_RING	GENMASK(15, 14)
++#define MTK_FOE_VLAN2_WINFO_BSS		GENMASK(5, 0)
++#define MTK_FOE_VLAN2_WINFO_WCID	GENMASK(13, 6)
++#define MTK_FOE_VLAN2_WINFO_RING	GENMASK(15, 14)
+ 
+ enum {
+ 	MTK_FOE_STATE_INVALID,
+@@ -84,19 +85,16 @@ struct mtk_foe_mac_info {
+ 	u16 src_mac_lo;
+ };
+ 
++/* software-only entry type */
+ struct mtk_foe_bridge {
+-	u32 dest_mac_hi;
++	u8 dest_mac[ETH_ALEN];
++	u8 src_mac[ETH_ALEN];
++	u16 vlan;
+ 
+-	u16 src_mac_lo;
+-	u16 dest_mac_lo;
+-
+-	u32 src_mac_hi;
++	struct {} key_end;
+ 
+ 	u32 ib2;
+ 
+-	u32 _rsv[5];
+-
+-	u32 udf_tsid;
+ 	struct mtk_foe_mac_info l2;
+ };
+ 
+@@ -235,7 +233,37 @@ enum {
+ 	MTK_PPE_CPU_REASON_INVALID			= 0x1f,
+ };
+ 
++enum {
++	MTK_FLOW_TYPE_L4,
++	MTK_FLOW_TYPE_L2,
++	MTK_FLOW_TYPE_L2_SUBFLOW,
++};
++
++struct mtk_flow_entry {
++	union {
++		struct hlist_node list;
++		struct {
++			struct rhash_head l2_node;
++			struct hlist_head l2_flows;
++		};
++	};
++	u8 type;
++	s8 wed_index;
++	u16 hash;
++	union {
++		struct mtk_foe_entry data;
++		struct {
++			struct mtk_flow_entry *base_flow;
++			struct hlist_node list;
++			struct {} end;
++		} l2_data;
++	};
++	struct rhash_head node;
++	unsigned long cookie;
++};
++
+ struct mtk_ppe {
++	struct mtk_eth *eth;
+ 	struct device *dev;
+ 	void __iomem *base;
+ 	int version;
+@@ -243,19 +271,35 @@ struct mtk_ppe {
+ 	struct mtk_foe_entry *foe_table;
+ 	dma_addr_t foe_phys;
+ 
++	u16 foe_check_time[MTK_PPE_ENTRIES];
++	struct hlist_head foe_flow[MTK_PPE_ENTRIES / 2];
++
++	struct rhashtable l2_flows;
++
+ 	void *acct_table;
+ };
+ 
+-int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
+-		 int version);
++struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version);
+ int mtk_ppe_start(struct mtk_ppe *ppe);
+ int mtk_ppe_stop(struct mtk_ppe *ppe);
+ 
++void __mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash);
++
+ static inline void
+-mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
++mtk_ppe_check_skb(struct mtk_ppe *ppe, struct sk_buff *skb, u16 hash)
+ {
+-	ppe->foe_table[hash].ib1 = 0;
+-	dma_wmb();
++	u16 now, diff;
++
++	if (!ppe)
++		return;
++
++	now = (u16)jiffies;
++	diff = now - ppe->foe_check_time[hash];
++	if (diff < HZ / 10)
++		return;
++
++	ppe->foe_check_time[hash] = now;
++	__mtk_ppe_check_skb(ppe, skb, hash);
+ }
+ 
+ static inline int
+@@ -281,8 +325,11 @@ int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
+ int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
+ int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
+ int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
+-int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
+-			 u16 timestamp);
++int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq,
++			   int bss, int wcid);
++int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
++void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
++int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+ int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
+ 
+ #endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+index d4b482340..a591ab1fd 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+@@ -32,7 +32,6 @@ static const char *mtk_foe_pkt_type_str(int type)
+ 	static const char * const type_str[] = {
+ 		[MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
+ 		[MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
+-		[MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
+ 		[MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
+ 		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
+ 		[MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
+@@ -207,6 +206,9 @@ int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
+ 	struct dentry *root;
+ 
+ 	root = debugfs_create_dir("mtk_ppe", NULL);
++	if (!root)
++		return -ENOMEM;
++
+ 	debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
+ 	debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+index 4294f0c74..d4a012608 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -11,6 +11,7 @@
+ #include <net/pkt_cls.h>
+ #include <net/dsa.h>
+ #include "mtk_eth_soc.h"
++#include "mtk_wed.h"
+ 
+ struct mtk_flow_data {
+ 	struct ethhdr eth;
+@@ -30,6 +31,8 @@ struct mtk_flow_data {
+ 	__be16 src_port;
+ 	__be16 dst_port;
+ 
++	u16 vlan_in;
++
+ 	struct {
+ 		u16 id;
+ 		__be16 proto;
+@@ -41,12 +44,6 @@ struct mtk_flow_data {
+ 	} pppoe;
+ };
+ 
+-struct mtk_flow_entry {
+-	struct rhash_head node;
+-	unsigned long cookie;
+-	u16 hash;
+-};
+-
+ static const struct rhashtable_params mtk_flow_ht_params = {
+ 	.head_offset = offsetof(struct mtk_flow_entry, node),
+ 	.key_offset = offsetof(struct mtk_flow_entry, cookie),
+@@ -54,12 +51,6 @@ static const struct rhashtable_params mtk_flow_ht_params = {
+ 	.automatic_shrinking = true,
+ };
+ 
+-static u32
+-mtk_eth_timestamp(struct mtk_eth *eth)
+-{
+-	return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
+-}
+-
+ static int
+ mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
+ 		       bool egress)
+@@ -94,6 +85,35 @@ mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
+ 	memcpy(dest, src, act->mangle.mask ? 2 : 4);
+ }
+ 
++static int
++mtk_flow_get_wdma_info(struct net_device *dev, const u8 *addr, struct mtk_wdma_info *info)
++{
++	struct net_device_path_ctx ctx = {
++		.dev = dev,
++	};
++	struct net_device_path path = {};
++
++	if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED))
++		return -1;
++
++	if (!dev->netdev_ops->ndo_fill_forward_path)
++		return -1;
++
++	memcpy(ctx.daddr, addr, sizeof(ctx.daddr));
++	if (dev->netdev_ops->ndo_fill_forward_path(&ctx, &path))
++		return -1;
++
++	if (path.type != DEV_PATH_MTK_WDMA)
++		return -1;
++
++	info->wdma_idx = path.mtk_wdma.wdma_idx;
++	info->queue = path.mtk_wdma.queue;
++	info->bss = path.mtk_wdma.bss;
++	info->wcid = path.mtk_wdma.wcid;
++
++	return 0;
++}
++
+ 
+ static int
+ mtk_flow_mangle_ports(const struct flow_action_entry *act,
+@@ -163,10 +183,20 @@ mtk_flow_get_dsa_port(struct net_device **dev)
+ 
+ static int
+ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+-			   struct net_device *dev)
++			   struct net_device *dev, const u8 *dest_mac,
++			   int *wed_index)
+ {
++	struct mtk_wdma_info info = {};
+ 	int pse_port, dsa_port;
+ 
++	if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
++		mtk_foe_entry_set_wdma(foe, info.wdma_idx, info.queue, info.bss,
++				       info.wcid);
++		pse_port = 3;
++		*wed_index = info.wdma_idx;
++		goto out;
++	}
++
+ 	dsa_port = mtk_flow_get_dsa_port(&dev);
+ 	if (dsa_port >= 0)
+ 		mtk_foe_entry_set_dsa(foe, dsa_port);
+@@ -178,6 +208,7 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+ 	else
+ 		return -EOPNOTSUPP;
+ 
++out:
+ 	mtk_foe_entry_set_pse_port(foe, pse_port);
+ 
+ 	return 0;
+@@ -193,11 +224,10 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	struct net_device *odev = NULL;
+ 	struct mtk_flow_entry *entry;
+ 	int offload_type = 0;
++	int wed_index = -1;
+ 	u16 addr_type = 0;
+-	u32 timestamp;
+ 	u8 l4proto = 0;
+ 	int err = 0;
+-	int hash;
+ 	int i;
+ 
+ 	if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
+@@ -229,9 +259,45 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 		return -EOPNOTSUPP;
+ 	}
+ 
++	switch (addr_type) {
++	case 0:
++		offload_type = MTK_PPE_PKT_TYPE_BRIDGE;
++		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
++			struct flow_match_eth_addrs match;
++
++			flow_rule_match_eth_addrs(rule, &match);
++			memcpy(data.eth.h_dest, match.key->dst, ETH_ALEN);
++			memcpy(data.eth.h_source, match.key->src, ETH_ALEN);
++		} else {
++			return -EOPNOTSUPP;
++		}
++
++		if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
++			struct flow_match_vlan match;
++
++			flow_rule_match_vlan(rule, &match);
++
++			if (match.key->vlan_tpid != cpu_to_be16(ETH_P_8021Q))
++				return -EOPNOTSUPP;
++
++			data.vlan_in = match.key->vlan_id;
++		}
++		break;
++	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
++		offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
++		break;
++	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
++		offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
++		break;
++	default:
++		return -EOPNOTSUPP;
++	}
++
+ 	flow_action_for_each(i, act, &rule->action) {
+ 		switch (act->id) {
+ 		case FLOW_ACTION_MANGLE:
++			if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
++				return -EOPNOTSUPP;
+ 			if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
+ 				mtk_flow_offload_mangle_eth(act, &data.eth);
+ 			break;
+@@ -263,17 +329,6 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 		}
+ 	}
+ 
+-	switch (addr_type) {
+-	case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+-		offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
+-		break;
+-	case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
+-		offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
+-		break;
+-	default:
+-		return -EOPNOTSUPP;
+-	}
+-
+ 	if (!is_valid_ether_addr(data.eth.h_source) ||
+ 	    !is_valid_ether_addr(data.eth.h_dest))
+ 		return -EINVAL;
+@@ -287,10 +342,13 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
+ 		struct flow_match_ports ports;
+ 
++		if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
++			return -EOPNOTSUPP;
++
+ 		flow_rule_match_ports(rule, &ports);
+ 		data.src_port = ports.key->src;
+ 		data.dst_port = ports.key->dst;
+-	} else {
++	} else if (offload_type != MTK_PPE_PKT_TYPE_BRIDGE) {
+ 		return -EOPNOTSUPP;
+ 	}
+ 
+@@ -320,6 +378,9 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 		if (act->id != FLOW_ACTION_MANGLE)
+ 			continue;
+ 
++		if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
++			return -EOPNOTSUPP;
++
+ 		switch (act->mangle.htype) {
+ 		case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+ 		case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+@@ -345,6 +406,9 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 			return err;
+ 	}
+ 
++	if (offload_type == MTK_PPE_PKT_TYPE_BRIDGE)
++		foe.bridge.vlan = data.vlan_in;
++
+ 	if (data.vlan.num == 1) {
+ 		if (data.vlan.proto != htons(ETH_P_8021Q))
+ 			return -EOPNOTSUPP;
+@@ -354,33 +418,38 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	if (data.pppoe.num == 1)
+ 		mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
+ 
+-	err = mtk_flow_set_output_device(eth, &foe, odev);
++	err = mtk_flow_set_output_device(eth, &foe, odev, data.eth.h_dest,
++					 &wed_index);
+ 	if (err)
+ 		return err;
+ 
++	if (wed_index >= 0 && (err = mtk_wed_flow_add(wed_index)) < 0)
++		return err;
++
+ 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ 	if (!entry)
+ 		return -ENOMEM;
+ 
+ 	entry->cookie = f->cookie;
+-	timestamp = mtk_eth_timestamp(eth);
+-	hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
+-	if (hash < 0) {
+-		err = hash;
++	memcpy(&entry->data, &foe, sizeof(entry->data));
++	entry->wed_index = wed_index;
++
++	if (mtk_foe_entry_commit(eth->ppe, entry) < 0)
+ 		goto free;
+-	}
+ 
+-	entry->hash = hash;
+ 	err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
+ 				     mtk_flow_ht_params);
+ 	if (err < 0)
+-		goto clear_flow;
++		goto clear;
+ 
+ 	return 0;
+-clear_flow:
+-	mtk_foe_entry_clear(&eth->ppe, hash);
++
++clear:
++	mtk_foe_entry_clear(eth->ppe, entry);
+ free:
+ 	kfree(entry);
++	if (wed_index >= 0)
++	    mtk_wed_flow_remove(wed_index);
+ 	return err;
+ }
+ 
+@@ -394,9 +463,11 @@ mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	if (!entry)
+ 		return -ENOENT;
+ 
+-	mtk_foe_entry_clear(&eth->ppe, entry->hash);
++	mtk_foe_entry_clear(eth->ppe, entry);
+ 	rhashtable_remove_fast(&eth->flow_table, &entry->node,
+ 			       mtk_flow_ht_params);
++	if (entry->wed_index >= 0)
++		mtk_wed_flow_remove(entry->wed_index);
+ 	kfree(entry);
+ 
+ 	return 0;
+@@ -406,7 +477,6 @@ static int
+ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
+ {
+ 	struct mtk_flow_entry *entry;
+-	int timestamp;
+ 	u32 idle;
+ 
+ 	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+@@ -414,11 +484,7 @@ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	if (!entry)
+ 		return -ENOENT;
+ 
+-	timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
+-	if (timestamp < 0)
+-		return -ETIMEDOUT;
+-
+-	idle = mtk_eth_timestamp(eth) - timestamp;
++	idle = mtk_foe_entry_idle_time(eth->ppe, entry);
+ 	f->stats.lastused = jiffies - idle * HZ;
+ 
+ 	return 0;
+@@ -470,7 +536,7 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+ 	struct flow_block_cb *block_cb;
+ 	flow_setup_cb_t *cb;
+ 
+-	if (!eth->ppe.foe_table)
++	if (!eth->ppe || !eth->ppe->foe_table)
+ 		return -EOPNOTSUPP;
+ 
+ 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+@@ -511,15 +577,18 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+ int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ 		     void *type_data)
+ {
+-	if (type == TC_SETUP_FT)
++	switch (type) {
++	case TC_SETUP_BLOCK:
++	case TC_SETUP_FT:
+ 		return mtk_eth_setup_tc_block(dev, type_data);
+-
+-	return -EOPNOTSUPP;
++	default:
++		return -EOPNOTSUPP;
++	}
+ }
+ 
+ int mtk_eth_offload_init(struct mtk_eth *eth)
+ {
+-	if (!eth->ppe.foe_table)
++	if (!eth->ppe || !eth->ppe->foe_table)
+ 		return 0;
+ 
+ 	return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
+new file mode 100644
+index 000000000..ea1cbdf1a
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed.c
+@@ -0,0 +1,876 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
++
++#include <linux/kernel.h>
++#include <linux/slab.h>
++#include <linux/module.h>
++#include <linux/bitfield.h>
++#include <linux/dma-mapping.h>
++#include <linux/skbuff.h>
++#include <linux/of_platform.h>
++#include <linux/of_address.h>
++#include <linux/mfd/syscon.h>
++#include <linux/debugfs.h>
++#include <linux/iopoll.h>
++#include <linux/soc/mediatek/mtk_wed.h>
++#include "mtk_eth_soc.h"
++#include "mtk_wed_regs.h"
++#include "mtk_wed.h"
++#include "mtk_ppe.h"
++
++#define MTK_PCIE_BASE(n)		(0x1a143000 + (n) * 0x2000)
++
++#define MTK_WED_PKT_SIZE		1900
++#define MTK_WED_BUF_SIZE		2048
++#define MTK_WED_BUF_PER_PAGE		(PAGE_SIZE / 2048)
++
++#define MTK_WED_TX_RING_SIZE		2048
++#define MTK_WED_WDMA_RING_SIZE		1024
++
++static struct mtk_wed_hw *hw_list[2];
++static DEFINE_MUTEX(hw_lock);
++
++static void
++wed_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
++{
++	regmap_update_bits(dev->hw->regs, reg, mask | val, val);
++}
++
++static void
++wed_set(struct mtk_wed_device *dev, u32 reg, u32 mask)
++{
++	return wed_m32(dev, reg, 0, mask);
++}
++
++static void
++wed_clr(struct mtk_wed_device *dev, u32 reg, u32 mask)
++{
++	return wed_m32(dev, reg, mask, 0);
++}
++
++static void
++wdma_m32(struct mtk_wed_device *dev, u32 reg, u32 mask, u32 val)
++{
++	wdma_w32(dev, reg, (wdma_r32(dev, reg) & ~mask) | val);
++}
++
++static void
++wdma_set(struct mtk_wed_device *dev, u32 reg, u32 mask)
++{
++	wdma_m32(dev, reg, 0, mask);
++}
++
++static u32
++mtk_wed_read_reset(struct mtk_wed_device *dev)
++{
++	return wed_r32(dev, MTK_WED_RESET);
++}
++
++static void
++mtk_wed_reset(struct mtk_wed_device *dev, u32 mask)
++{
++	u32 status;
++
++	wed_w32(dev, MTK_WED_RESET, mask);
++	if (readx_poll_timeout(mtk_wed_read_reset, dev, status,
++			       !(status & mask), 0, 1000))
++		WARN_ON_ONCE(1);
++}
++
++static struct mtk_wed_hw *
++mtk_wed_assign(struct mtk_wed_device *dev)
++{
++	struct mtk_wed_hw *hw;
++
++	hw = hw_list[pci_domain_nr(dev->wlan.pci_dev->bus)];
++	if (!hw || hw->wed_dev)
++		return NULL;
++
++	hw->wed_dev = dev;
++	return hw;
++}
++
++static int
++mtk_wed_buffer_alloc(struct mtk_wed_device *dev)
++{
++	struct mtk_wdma_desc *desc;
++	dma_addr_t desc_phys;
++	void **page_list;
++	int token = dev->wlan.token_start;
++	int ring_size;
++	int n_pages;
++	int i, page_idx;
++
++	ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
++	n_pages = ring_size / MTK_WED_BUF_PER_PAGE;
++
++	page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
++	if (!page_list)
++		return -ENOMEM;
++
++	dev->buf_ring.size = ring_size;
++	dev->buf_ring.pages = page_list;
++
++	desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc),
++				  &desc_phys, GFP_KERNEL);
++	if (!desc)
++		return -ENOMEM;
++
++	dev->buf_ring.desc = desc;
++	dev->buf_ring.desc_phys = desc_phys;
++
++	for (i = 0, page_idx = 0; i < ring_size; i += MTK_WED_BUF_PER_PAGE) {
++		dma_addr_t page_phys, buf_phys;
++		struct page *page;
++		void *buf;
++		int s;
++
++		page = __dev_alloc_pages(GFP_KERNEL, 0);
++		if (!page)
++			return -ENOMEM;
++
++		page_phys = dma_map_page(dev->hw->dev, page, 0, PAGE_SIZE,
++					 DMA_BIDIRECTIONAL);
++		if (dma_mapping_error(dev->hw->dev, page_phys)) {
++			__free_page(page);
++			return -ENOMEM;
++		}
++
++		page_list[page_idx++] = page;
++		dma_sync_single_for_cpu(dev->hw->dev, page_phys, PAGE_SIZE,
++					DMA_BIDIRECTIONAL);
++
++		buf = page_to_virt(page);
++		buf_phys = page_phys;
++
++		for (s = 0; s < MTK_WED_BUF_PER_PAGE; s++) {
++			u32 txd_size;
++
++			txd_size = dev->wlan.init_buf(buf, buf_phys, token++);
++
++			desc->buf0 = buf_phys;
++			desc->buf1 = buf_phys + txd_size;
++			desc->ctrl = FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN0,
++						txd_size) |
++				     FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
++						MTK_WED_BUF_SIZE - txd_size) |
++				     MTK_WDMA_DESC_CTRL_LAST_SEG1;
++			desc->info = 0;
++			desc++;
++
++			buf += MTK_WED_BUF_SIZE;
++			buf_phys += MTK_WED_BUF_SIZE;
++		}
++
++		dma_sync_single_for_device(dev->hw->dev, page_phys, PAGE_SIZE,
++					   DMA_BIDIRECTIONAL);
++	}
++
++	return 0;
++}
++
++static void
++mtk_wed_free_buffer(struct mtk_wed_device *dev)
++{
++	struct mtk_wdma_desc *desc = dev->buf_ring.desc;
++	void **page_list = dev->buf_ring.pages;
++	int page_idx;
++	int i;
++
++	if (!page_list)
++		return;
++
++	if (!desc)
++		goto free_pagelist;
++
++	for (i = 0, page_idx = 0; i < dev->buf_ring.size; i += MTK_WED_BUF_PER_PAGE) {
++		void *page = page_list[page_idx++];
++
++		if (!page)
++			break;
++
++		dma_unmap_page(dev->hw->dev, desc[i].buf0,
++			       PAGE_SIZE, DMA_BIDIRECTIONAL);
++		__free_page(page);
++	}
++
++	dma_free_coherent(dev->hw->dev, dev->buf_ring.size * sizeof(*desc),
++			  desc, dev->buf_ring.desc_phys);
++
++free_pagelist:
++	kfree(page_list);
++}
++
++static void
++mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring)
++{
++	if (!ring->desc)
++		return;
++
++	dma_free_coherent(dev->hw->dev, ring->size * sizeof(*ring->desc),
++			  ring->desc, ring->desc_phys);
++}
++
++static void
++mtk_wed_free_tx_rings(struct mtk_wed_device *dev)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++)
++		mtk_wed_free_ring(dev, &dev->tx_ring[i]);
++	for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
++		mtk_wed_free_ring(dev, &dev->tx_wdma[i]);
++}
++
++static void
++mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
++{
++	u32 mask = MTK_WED_EXT_INT_STATUS_ERROR_MASK;
++
++	if (!dev->hw->num_flows)
++		mask &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
++
++	wed_w32(dev, MTK_WED_EXT_INT_MASK, en ? mask : 0);
++	wed_r32(dev, MTK_WED_EXT_INT_MASK);
++}
++
++static void
++mtk_wed_stop(struct mtk_wed_device *dev)
++{
++	regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
++	mtk_wed_set_ext_int(dev, false);
++
++	wed_clr(dev, MTK_WED_CTRL,
++		MTK_WED_CTRL_WDMA_INT_AGENT_EN |
++		MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
++		MTK_WED_CTRL_WED_TX_BM_EN |
++		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
++	wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
++	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
++	wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
++	wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
++	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
++
++	wed_clr(dev, MTK_WED_GLO_CFG,
++		MTK_WED_GLO_CFG_TX_DMA_EN |
++		MTK_WED_GLO_CFG_RX_DMA_EN);
++	wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
++		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
++		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
++	wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
++		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
++}
++
++static void
++mtk_wed_detach(struct mtk_wed_device *dev)
++{
++	struct device_node *wlan_node = dev->wlan.pci_dev->dev.of_node;
++	struct mtk_wed_hw *hw = dev->hw;
++
++	mutex_lock(&hw_lock);
++
++	mtk_wed_stop(dev);
++
++	wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
++	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
++
++	mtk_wed_reset(dev, MTK_WED_RESET_WED);
++
++	mtk_wed_free_buffer(dev);
++	mtk_wed_free_tx_rings(dev);
++
++	if (of_dma_is_coherent(wlan_node))
++		regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
++				   BIT(hw->index), BIT(hw->index));
++
++	if (!hw_list[!hw->index]->wed_dev &&
++	    hw->eth->dma_dev != hw->eth->dev)
++		mtk_eth_set_dma_device(hw->eth, hw->eth->dev);
++
++	memset(dev, 0, sizeof(*dev));
++	module_put(THIS_MODULE);
++
++	hw->wed_dev = NULL;
++	mutex_unlock(&hw_lock);
++}
++
++static void
++mtk_wed_hw_init_early(struct mtk_wed_device *dev)
++{
++	u32 mask, set;
++	u32 offset;
++
++	mtk_wed_stop(dev);
++	mtk_wed_reset(dev, MTK_WED_RESET_WED);
++
++	mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE |
++	       MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
++	       MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
++	set = FIELD_PREP(MTK_WED_WDMA_GLO_CFG_BT_SIZE, 2) |
++	      MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP |
++	      MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
++	wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set);
++
++	wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO_PRERES);
++
++	offset = dev->hw->index ? 0x04000400 : 0;
++	wed_w32(dev, MTK_WED_WDMA_OFFSET0, 0x2a042a20 + offset);
++	wed_w32(dev, MTK_WED_WDMA_OFFSET1, 0x29002800 + offset);
++
++	wed_w32(dev, MTK_WED_PCIE_CFG_BASE, MTK_PCIE_BASE(dev->hw->index));
++	wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys);
++}
++
++static void
++mtk_wed_hw_init(struct mtk_wed_device *dev)
++{
++	if (dev->init_done)
++		return;
++
++	dev->init_done = true;
++	mtk_wed_set_ext_int(dev, false);
++	wed_w32(dev, MTK_WED_TX_BM_CTRL,
++		MTK_WED_TX_BM_CTRL_PAUSE |
++		FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
++			   dev->buf_ring.size / 128) |
++		FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
++			   MTK_WED_TX_RING_SIZE / 256));
++
++	wed_w32(dev, MTK_WED_TX_BM_BASE, dev->buf_ring.desc_phys);
++
++	wed_w32(dev, MTK_WED_TX_BM_TKID,
++		FIELD_PREP(MTK_WED_TX_BM_TKID_START,
++			   dev->wlan.token_start) |
++		FIELD_PREP(MTK_WED_TX_BM_TKID_END,
++			   dev->wlan.token_start + dev->wlan.nbuf - 1));
++
++	wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE);
++
++	wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
++		FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) |
++		MTK_WED_TX_BM_DYN_THR_HI);
++
++	mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
++
++	wed_set(dev, MTK_WED_CTRL,
++		MTK_WED_CTRL_WED_TX_BM_EN |
++		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
++
++	wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE);
++}
++
++static void
++mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size)
++{
++	int i;
++
++	for (i = 0; i < size; i++) {
++		desc[i].buf0 = 0;
++		desc[i].ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
++		desc[i].buf1 = 0;
++		desc[i].info = 0;
++	}
++}
++
++static u32
++mtk_wed_check_busy(struct mtk_wed_device *dev)
++{
++	if (wed_r32(dev, MTK_WED_GLO_CFG) & MTK_WED_GLO_CFG_TX_DMA_BUSY)
++		return true;
++
++	if (wed_r32(dev, MTK_WED_WPDMA_GLO_CFG) &
++	    MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY)
++		return true;
++
++	if (wed_r32(dev, MTK_WED_CTRL) & MTK_WED_CTRL_WDMA_INT_AGENT_BUSY)
++		return true;
++
++	if (wed_r32(dev, MTK_WED_WDMA_GLO_CFG) &
++	    MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY)
++		return true;
++
++	if (wdma_r32(dev, MTK_WDMA_GLO_CFG) &
++	    MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY)
++		return true;
++
++	if (wed_r32(dev, MTK_WED_CTRL) &
++	    (MTK_WED_CTRL_WED_TX_BM_BUSY | MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY))
++		return true;
++
++	return false;
++}
++
++static int
++mtk_wed_poll_busy(struct mtk_wed_device *dev)
++{
++	int sleep = 15000;
++	int timeout = 100 * sleep;
++	u32 val;
++
++	return read_poll_timeout(mtk_wed_check_busy, val, !val, sleep,
++				 timeout, false, dev);
++}
++
++static void
++mtk_wed_reset_dma(struct mtk_wed_device *dev)
++{
++	bool busy = false;
++	u32 val;
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++) {
++		struct mtk_wdma_desc *desc = dev->tx_ring[i].desc;
++
++		if (!desc)
++			continue;
++
++		mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE);
++	}
++
++	if (mtk_wed_poll_busy(dev))
++		busy = mtk_wed_check_busy(dev);
++
++	if (busy) {
++		mtk_wed_reset(dev, MTK_WED_RESET_WED_TX_DMA);
++	} else {
++		wed_w32(dev, MTK_WED_RESET_IDX,
++			MTK_WED_RESET_IDX_TX |
++			MTK_WED_RESET_IDX_RX);
++		wed_w32(dev, MTK_WED_RESET_IDX, 0);
++	}
++
++	wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_RX);
++	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
++
++	if (busy) {
++		mtk_wed_reset(dev, MTK_WED_RESET_WDMA_INT_AGENT);
++		mtk_wed_reset(dev, MTK_WED_RESET_WDMA_RX_DRV);
++	} else {
++		wed_w32(dev, MTK_WED_WDMA_RESET_IDX,
++			MTK_WED_WDMA_RESET_IDX_RX | MTK_WED_WDMA_RESET_IDX_DRV);
++		wed_w32(dev, MTK_WED_WDMA_RESET_IDX, 0);
++
++		wed_set(dev, MTK_WED_WDMA_GLO_CFG,
++			MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE);
++
++		wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
++			MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE);
++	}
++
++	for (i = 0; i < 100; i++) {
++		val = wed_r32(dev, MTK_WED_TX_BM_INTF);
++		if (FIELD_GET(MTK_WED_TX_BM_INTF_TKFIFO_FDEP, val) == 0x40)
++			break;
++	}
++
++	mtk_wed_reset(dev, MTK_WED_RESET_TX_FREE_AGENT);
++	mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
++
++	if (busy) {
++		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_INT_AGENT);
++		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_TX_DRV);
++		mtk_wed_reset(dev, MTK_WED_RESET_WPDMA_RX_DRV);
++	} else {
++		wed_w32(dev, MTK_WED_WPDMA_RESET_IDX,
++			MTK_WED_WPDMA_RESET_IDX_TX |
++			MTK_WED_WPDMA_RESET_IDX_RX);
++		wed_w32(dev, MTK_WED_WPDMA_RESET_IDX, 0);
++	}
++
++}
++
++static int
++mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring,
++		   int size)
++{
++	ring->desc = dma_alloc_coherent(dev->hw->dev,
++					size * sizeof(*ring->desc),
++					&ring->desc_phys, GFP_KERNEL);
++	if (!ring->desc)
++		return -ENOMEM;
++
++	ring->size = size;
++	mtk_wed_ring_reset(ring->desc, size);
++
++	return 0;
++}
++
++static int
++mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size)
++{
++	struct mtk_wed_ring *wdma = &dev->tx_wdma[idx];
++
++	if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE))
++		return -ENOMEM;
++
++	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
++		 wdma->desc_phys);
++	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT,
++		 size);
++	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
++
++	wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
++		wdma->desc_phys);
++	wed_w32(dev, MTK_WED_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_COUNT,
++		size);
++
++	return 0;
++}
++
++static void
++mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
++{
++	u32 wdma_mask;
++	u32 val;
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
++		if (!dev->tx_wdma[i].desc)
++			mtk_wed_wdma_ring_setup(dev, i, 16);
++
++	wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0));
++
++	mtk_wed_hw_init(dev);
++
++	wed_set(dev, MTK_WED_CTRL,
++		MTK_WED_CTRL_WDMA_INT_AGENT_EN |
++		MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
++		MTK_WED_CTRL_WED_TX_BM_EN |
++		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
++
++	wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, MTK_WED_PCIE_INT_TRIGGER_STATUS);
++
++	wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER,
++		MTK_WED_WPDMA_INT_TRIGGER_RX_DONE |
++		MTK_WED_WPDMA_INT_TRIGGER_TX_DONE);
++
++	wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
++		MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
++
++	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask);
++	wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
++
++	wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask);
++	wdma_w32(dev, MTK_WDMA_INT_GRP2, wdma_mask);
++
++	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask);
++	wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
++
++	wed_set(dev, MTK_WED_GLO_CFG,
++		MTK_WED_GLO_CFG_TX_DMA_EN |
++		MTK_WED_GLO_CFG_RX_DMA_EN);
++	wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
++		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
++		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
++	wed_set(dev, MTK_WED_WDMA_GLO_CFG,
++		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
++
++	mtk_wed_set_ext_int(dev, true);
++	val = dev->wlan.wpdma_phys |
++	      MTK_PCIE_MIRROR_MAP_EN |
++	      FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index);
++
++	if (dev->hw->index)
++		val |= BIT(1);
++	val |= BIT(0);
++	regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
++
++	dev->running = true;
++}
++
++static int
++mtk_wed_attach(struct mtk_wed_device *dev)
++	__releases(RCU)
++{
++	struct mtk_wed_hw *hw;
++	int ret = 0;
++
++	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
++			 "mtk_wed_attach without holding the RCU read lock");
++
++	if (pci_domain_nr(dev->wlan.pci_dev->bus) > 1 ||
++	    !try_module_get(THIS_MODULE))
++		ret = -ENODEV;
++
++	rcu_read_unlock();
++
++	if (ret)
++		return ret;
++
++	mutex_lock(&hw_lock);
++
++	hw = mtk_wed_assign(dev);
++	if (!hw) {
++		module_put(THIS_MODULE);
++		ret = -ENODEV;
++		goto out;
++	}
++
++	dev_info(&dev->wlan.pci_dev->dev, "attaching wed device %d\n", hw->index);
++
++	dev->hw = hw;
++	dev->dev = hw->dev;
++	dev->irq = hw->irq;
++	dev->wdma_idx = hw->index;
++
++	if (hw->eth->dma_dev == hw->eth->dev &&
++	    of_dma_is_coherent(hw->eth->dev->of_node))
++		mtk_eth_set_dma_device(hw->eth, hw->dev);
++
++	ret = mtk_wed_buffer_alloc(dev);
++	if (ret) {
++		mtk_wed_detach(dev);
++		goto out;
++	}
++
++	mtk_wed_hw_init_early(dev);
++	regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0);
++
++out:
++	mutex_unlock(&hw_lock);
++
++	return ret;
++}
++
++static int
++mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
++{
++	struct mtk_wed_ring *ring = &dev->tx_ring[idx];
++
++	/*
++	 * Tx ring redirection:
++	 * Instead of configuring the WLAN PDMA TX ring directly, the WLAN
++	 * driver allocated DMA ring gets configured into WED MTK_WED_RING_TX(n)
++	 * registers.
++	 *
++	 * WED driver posts its own DMA ring as WLAN PDMA TX and configures it
++	 * into MTK_WED_WPDMA_RING_TX(n) registers.
++	 * It gets filled with packets picked up from WED TX ring and from
++	 * WDMA RX.
++	 */
++
++	BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
++
++	if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
++		return -ENOMEM;
++
++	if (mtk_wed_wdma_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
++		return -ENOMEM;
++
++	ring->reg_base = MTK_WED_RING_TX(idx);
++	ring->wpdma = regs;
++
++	/* WED -> WPDMA */
++	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys);
++	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_TX_RING_SIZE);
++	wpdma_tx_w32(dev, idx, MTK_WED_RING_OFS_CPU_IDX, 0);
++
++	wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE,
++		ring->desc_phys);
++	wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT,
++		MTK_WED_TX_RING_SIZE);
++	wed_w32(dev, MTK_WED_WPDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
++
++	return 0;
++}
++
++static int
++mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
++{
++	struct mtk_wed_ring *ring = &dev->txfree_ring;
++	int i;
++
++	/*
++	 * For txfree event handling, the same DMA ring is shared between WED
++	 * and WLAN. The WLAN driver accesses the ring index registers through
++	 * WED
++	 */
++	ring->reg_base = MTK_WED_RING_RX(1);
++	ring->wpdma = regs;
++
++	for (i = 0; i < 12; i += 4) {
++		u32 val = readl(regs + i);
++
++		wed_w32(dev, MTK_WED_RING_RX(1) + i, val);
++		wed_w32(dev, MTK_WED_WPDMA_RING_RX(1) + i, val);
++	}
++
++	return 0;
++}
++
++static u32
++mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
++{
++	u32 val;
++
++	val = wed_r32(dev, MTK_WED_EXT_INT_STATUS);
++	wed_w32(dev, MTK_WED_EXT_INT_STATUS, val);
++	val &= MTK_WED_EXT_INT_STATUS_ERROR_MASK;
++	if (!dev->hw->num_flows)
++		val &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
++	if (val && net_ratelimit())
++		pr_err("mtk_wed%d: error status=%08x\n", dev->hw->index, val);
++
++	val = wed_r32(dev, MTK_WED_INT_STATUS);
++	val &= mask;
++	wed_w32(dev, MTK_WED_INT_STATUS, val); /* ACK */
++
++	return val;
++}
++
++static void
++mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
++{
++	if (!dev->running)
++		return;
++
++	mtk_wed_set_ext_int(dev, !!mask);
++	wed_w32(dev, MTK_WED_INT_MASK, mask);
++}
++
++int mtk_wed_flow_add(int index)
++{
++	struct mtk_wed_hw *hw = hw_list[index];
++	int ret;
++
++	if (!hw || !hw->wed_dev)
++		return -ENODEV;
++
++	if (hw->num_flows) {
++		hw->num_flows++;
++		return 0;
++	}
++
++	mutex_lock(&hw_lock);
++	if (!hw->wed_dev) {
++		ret = -ENODEV;
++		goto out;
++	}
++
++	ret = hw->wed_dev->wlan.offload_enable(hw->wed_dev);
++	if (!ret)
++		hw->num_flows++;
++	mtk_wed_set_ext_int(hw->wed_dev, true);
++
++out:
++	mutex_unlock(&hw_lock);
++
++	return ret;
++}
++
++void mtk_wed_flow_remove(int index)
++{
++	struct mtk_wed_hw *hw = hw_list[index];
++
++	if (!hw)
++		return;
++
++	if (--hw->num_flows)
++		return;
++
++	mutex_lock(&hw_lock);
++	if (!hw->wed_dev)
++		goto out;
++
++	hw->wed_dev->wlan.offload_disable(hw->wed_dev);
++	mtk_wed_set_ext_int(hw->wed_dev, true);
++
++out:
++	mutex_unlock(&hw_lock);
++}
++
++void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
++		    void __iomem *wdma, int index)
++{
++	static const struct mtk_wed_ops wed_ops = {
++		.attach = mtk_wed_attach,
++		.tx_ring_setup = mtk_wed_tx_ring_setup,
++		.txfree_ring_setup = mtk_wed_txfree_ring_setup,
++		.start = mtk_wed_start,
++		.stop = mtk_wed_stop,
++		.reset_dma = mtk_wed_reset_dma,
++		.reg_read = wed_r32,
++		.reg_write = wed_w32,
++		.irq_get = mtk_wed_irq_get,
++		.irq_set_mask = mtk_wed_irq_set_mask,
++		.detach = mtk_wed_detach,
++	};
++	struct device_node *eth_np = eth->dev->of_node;
++	struct platform_device *pdev;
++	struct mtk_wed_hw *hw;
++	struct regmap *regs;
++	int irq;
++
++	if (!np)
++		return;
++
++	pdev = of_find_device_by_node(np);
++	if (!pdev)
++		return;
++
++	get_device(&pdev->dev);
++	irq = platform_get_irq(pdev, 0);
++	if (irq < 0)
++		return;
++
++	regs = syscon_regmap_lookup_by_phandle(np, NULL);
++	if (!regs)
++		return;
++
++	rcu_assign_pointer(mtk_soc_wed_ops, &wed_ops);
++
++	mutex_lock(&hw_lock);
++
++	if (WARN_ON(hw_list[index]))
++		goto unlock;
++
++	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
++	hw->node = np;
++	hw->regs = regs;
++	hw->eth = eth;
++	hw->dev = &pdev->dev;
++	hw->wdma = wdma;
++	hw->index = index;
++	hw->irq = irq;
++	hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
++						     "mediatek,pcie-mirror");
++	hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np,
++						     "mediatek,hifsys");
++	if (IS_ERR(hw->mirror) || IS_ERR(hw->hifsys)) {
++		kfree(hw);
++		goto unlock;
++	}
++
++	if (!index) {
++		regmap_write(hw->mirror, 0, 0);
++		regmap_write(hw->mirror, 4, 0);
++	}
++	mtk_wed_hw_add_debugfs(hw);
++
++	hw_list[index] = hw;
++
++unlock:
++	mutex_unlock(&hw_lock);
++}
++
++void mtk_wed_exit(void)
++{
++	int i;
++
++	rcu_assign_pointer(mtk_soc_wed_ops, NULL);
++
++	synchronize_rcu();
++
++	for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
++		struct mtk_wed_hw *hw;
++
++		hw = hw_list[i];
++		if (!hw)
++			continue;
++
++		hw_list[i] = NULL;
++		debugfs_remove(hw->debugfs_dir);
++		put_device(hw->dev);
++		kfree(hw);
++	}
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h
+new file mode 100644
+index 000000000..981ec613f
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed.h
+@@ -0,0 +1,135 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
++
++#ifndef __MTK_WED_PRIV_H
++#define __MTK_WED_PRIV_H
++
++#include <linux/soc/mediatek/mtk_wed.h>
++#include <linux/debugfs.h>
++#include <linux/regmap.h>
++#include <linux/netdevice.h>
++
++struct mtk_eth;
++
++struct mtk_wed_hw {
++	struct device_node *node;
++	struct mtk_eth *eth;
++	struct regmap *regs;
++	struct regmap *hifsys;
++	struct device *dev;
++	void __iomem *wdma;
++	struct regmap *mirror;
++	struct dentry *debugfs_dir;
++	struct mtk_wed_device *wed_dev;
++	u32 debugfs_reg;
++	u32 num_flows;
++	char dirname[5];
++	int irq;
++	int index;
++};
++
++struct mtk_wdma_info {
++	u8 wdma_idx;
++	u8 queue;
++	u16 wcid;
++	u8 bss;
++};
++
++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
++static inline void
++wed_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
++{
++	regmap_write(dev->hw->regs, reg, val);
++}
++
++static inline u32
++wed_r32(struct mtk_wed_device *dev, u32 reg)
++{
++	unsigned int val;
++
++	regmap_read(dev->hw->regs, reg, &val);
++
++	return val;
++}
++
++static inline void
++wdma_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
++{
++	writel(val, dev->hw->wdma + reg);
++}
++
++static inline u32
++wdma_r32(struct mtk_wed_device *dev, u32 reg)
++{
++	return readl(dev->hw->wdma + reg);
++}
++
++static inline u32
++wpdma_tx_r32(struct mtk_wed_device *dev, int ring, u32 reg)
++{
++	if (!dev->tx_ring[ring].wpdma)
++		return 0;
++
++	return readl(dev->tx_ring[ring].wpdma + reg);
++}
++
++static inline void
++wpdma_tx_w32(struct mtk_wed_device *dev, int ring, u32 reg, u32 val)
++{
++	if (!dev->tx_ring[ring].wpdma)
++		return;
++
++	writel(val, dev->tx_ring[ring].wpdma + reg);
++}
++
++static inline u32
++wpdma_txfree_r32(struct mtk_wed_device *dev, u32 reg)
++{
++	if (!dev->txfree_ring.wpdma)
++		return 0;
++
++	return readl(dev->txfree_ring.wpdma + reg);
++}
++
++static inline void
++wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
++{
++	if (!dev->txfree_ring.wpdma)
++		return;
++
++	writel(val, dev->txfree_ring.wpdma + reg);
++}
++
++void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
++		    void __iomem *wdma, int index);
++void mtk_wed_exit(void);
++int mtk_wed_flow_add(int index);
++void mtk_wed_flow_remove(int index);
++#else
++static inline void
++mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
++	       void __iomem *wdma, int index)
++{
++}
++static inline void
++mtk_wed_exit(void)
++{
++}
++static inline int mtk_wed_flow_add(int index)
++{
++	return -EINVAL;
++}
++static inline void mtk_wed_flow_remove(int index)
++{
++}
++#endif
++
++#ifdef CONFIG_DEBUG_FS
++void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw);
++#else
++static inline void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
++{
++}
++#endif
++
++#endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+new file mode 100644
+index 000000000..a81d3fd1a
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+@@ -0,0 +1,175 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
++
++#include <linux/seq_file.h>
++#include "mtk_wed.h"
++#include "mtk_wed_regs.h"
++
++struct reg_dump {
++	const char *name;
++	u16 offset;
++	u8 type;
++	u8 base;
++};
++
++enum {
++	DUMP_TYPE_STRING,
++	DUMP_TYPE_WED,
++	DUMP_TYPE_WDMA,
++	DUMP_TYPE_WPDMA_TX,
++	DUMP_TYPE_WPDMA_TXFREE,
++};
++
++#define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING }
++#define DUMP_REG(_reg, ...) { #_reg, MTK_##_reg, __VA_ARGS__ }
++#define DUMP_RING(_prefix, _base, ...)				\
++	{ _prefix " BASE", _base, __VA_ARGS__ },		\
++	{ _prefix " CNT",  _base + 0x4, __VA_ARGS__ },	\
++	{ _prefix " CIDX", _base + 0x8, __VA_ARGS__ },	\
++	{ _prefix " DIDX", _base + 0xc, __VA_ARGS__ }
++
++#define DUMP_WED(_reg) DUMP_REG(_reg, DUMP_TYPE_WED)
++#define DUMP_WED_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WED)
++
++#define DUMP_WDMA(_reg) DUMP_REG(_reg, DUMP_TYPE_WDMA)
++#define DUMP_WDMA_RING(_base) DUMP_RING(#_base, MTK_##_base, DUMP_TYPE_WDMA)
++
++#define DUMP_WPDMA_TX_RING(_n) DUMP_RING("WPDMA_TX" #_n, 0, DUMP_TYPE_WPDMA_TX, _n)
++#define DUMP_WPDMA_TXFREE_RING DUMP_RING("WPDMA_RX1", 0, DUMP_TYPE_WPDMA_TXFREE)
++
++static void
++print_reg_val(struct seq_file *s, const char *name, u32 val)
++{
++	seq_printf(s, "%-32s %08x\n", name, val);
++}
++
++static void
++dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev,
++	      const struct reg_dump *regs, int n_regs)
++{
++	const struct reg_dump *cur;
++	u32 val;
++
++	for (cur = regs; cur < &regs[n_regs]; cur++) {
++		switch (cur->type) {
++		case DUMP_TYPE_STRING:
++			seq_printf(s, "%s======== %s:\n",
++				   cur > regs ? "\n" : "",
++				   cur->name);
++			continue;
++		case DUMP_TYPE_WED:
++			val = wed_r32(dev, cur->offset);
++			break;
++		case DUMP_TYPE_WDMA:
++			val = wdma_r32(dev, cur->offset);
++			break;
++		case DUMP_TYPE_WPDMA_TX:
++			val = wpdma_tx_r32(dev, cur->base, cur->offset);
++			break;
++		case DUMP_TYPE_WPDMA_TXFREE:
++			val = wpdma_txfree_r32(dev, cur->offset);
++			break;
++		}
++		print_reg_val(s, cur->name, val);
++	}
++}
++
++
++static int
++wed_txinfo_show(struct seq_file *s, void *data)
++{
++	static const struct reg_dump regs[] = {
++		DUMP_STR("WED TX"),
++		DUMP_WED(WED_TX_MIB(0)),
++		DUMP_WED_RING(WED_RING_TX(0)),
++
++		DUMP_WED(WED_TX_MIB(1)),
++		DUMP_WED_RING(WED_RING_TX(1)),
++
++		DUMP_STR("WPDMA TX"),
++		DUMP_WED(WED_WPDMA_TX_MIB(0)),
++		DUMP_WED_RING(WED_WPDMA_RING_TX(0)),
++		DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(0)),
++
++		DUMP_WED(WED_WPDMA_TX_MIB(1)),
++		DUMP_WED_RING(WED_WPDMA_RING_TX(1)),
++		DUMP_WED(WED_WPDMA_TX_COHERENT_MIB(1)),
++
++		DUMP_STR("WPDMA TX"),
++		DUMP_WPDMA_TX_RING(0),
++		DUMP_WPDMA_TX_RING(1),
++
++		DUMP_STR("WED WDMA RX"),
++		DUMP_WED(WED_WDMA_RX_MIB(0)),
++		DUMP_WED_RING(WED_WDMA_RING_RX(0)),
++		DUMP_WED(WED_WDMA_RX_THRES(0)),
++		DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(0)),
++		DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(0)),
++
++		DUMP_WED(WED_WDMA_RX_MIB(1)),
++		DUMP_WED_RING(WED_WDMA_RING_RX(1)),
++		DUMP_WED(WED_WDMA_RX_THRES(1)),
++		DUMP_WED(WED_WDMA_RX_RECYCLE_MIB(1)),
++		DUMP_WED(WED_WDMA_RX_PROCESSED_MIB(1)),
++
++		DUMP_STR("WDMA RX"),
++		DUMP_WDMA(WDMA_GLO_CFG),
++		DUMP_WDMA_RING(WDMA_RING_RX(0)),
++		DUMP_WDMA_RING(WDMA_RING_RX(1)),
++	};
++	struct mtk_wed_hw *hw = s->private;
++	struct mtk_wed_device *dev = hw->wed_dev;
++
++	if (!dev)
++		return 0;
++
++	dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
++
++	return 0;
++}
++DEFINE_SHOW_ATTRIBUTE(wed_txinfo);
++
++
++static int
++mtk_wed_reg_set(void *data, u64 val)
++{
++	struct mtk_wed_hw *hw = data;
++
++	regmap_write(hw->regs, hw->debugfs_reg, val);
++
++	return 0;
++}
++
++static int
++mtk_wed_reg_get(void *data, u64 *val)
++{
++	struct mtk_wed_hw *hw = data;
++	unsigned int regval;
++	int ret;
++
++	ret = regmap_read(hw->regs, hw->debugfs_reg, &regval);
++	if (ret)
++		return ret;
++
++	*val = regval;
++
++	return 0;
++}
++
++DEFINE_DEBUGFS_ATTRIBUTE(fops_regval, mtk_wed_reg_get, mtk_wed_reg_set,
++             "0x%08llx\n");
++
++void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
++{
++	struct dentry *dir;
++
++	snprintf(hw->dirname, sizeof(hw->dirname), "wed%d", hw->index);
++	dir = debugfs_create_dir(hw->dirname, NULL);
++	if (!dir)
++		return;
++
++	hw->debugfs_dir = dir;
++	debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg);
++	debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval);
++	debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops);
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_ops.c b/drivers/net/ethernet/mediatek/mtk_wed_ops.c
+new file mode 100644
+index 000000000..a5d9d8a5b
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_ops.c
+@@ -0,0 +1,8 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#include <linux/kernel.h>
++#include <linux/soc/mediatek/mtk_wed.h>
++
++const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
++EXPORT_SYMBOL_GPL(mtk_soc_wed_ops);
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+new file mode 100644
+index 000000000..0a0465ea5
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+@@ -0,0 +1,251 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
++
++#ifndef __MTK_WED_REGS_H
++#define __MTK_WED_REGS_H
++
++#define MTK_WDMA_DESC_CTRL_LEN1			GENMASK(14, 0)
++#define MTK_WDMA_DESC_CTRL_LAST_SEG1		BIT(15)
++#define MTK_WDMA_DESC_CTRL_BURST		BIT(16)
++#define MTK_WDMA_DESC_CTRL_LEN0			GENMASK(29, 16)
++#define MTK_WDMA_DESC_CTRL_LAST_SEG0		BIT(30)
++#define MTK_WDMA_DESC_CTRL_DMA_DONE		BIT(31)
++
++struct mtk_wdma_desc {
++	__le32 buf0;
++	__le32 ctrl;
++	__le32 buf1;
++	__le32 info;
++} __packed __aligned(4);
++
++#define MTK_WED_RESET					0x008
++#define MTK_WED_RESET_TX_BM				BIT(0)
++#define MTK_WED_RESET_TX_FREE_AGENT			BIT(4)
++#define MTK_WED_RESET_WPDMA_TX_DRV			BIT(8)
++#define MTK_WED_RESET_WPDMA_RX_DRV			BIT(9)
++#define MTK_WED_RESET_WPDMA_INT_AGENT			BIT(11)
++#define MTK_WED_RESET_WED_TX_DMA			BIT(12)
++#define MTK_WED_RESET_WDMA_RX_DRV			BIT(17)
++#define MTK_WED_RESET_WDMA_INT_AGENT			BIT(19)
++#define MTK_WED_RESET_WED				BIT(31)
++
++#define MTK_WED_CTRL					0x00c
++#define MTK_WED_CTRL_WPDMA_INT_AGENT_EN			BIT(0)
++#define MTK_WED_CTRL_WPDMA_INT_AGENT_BUSY		BIT(1)
++#define MTK_WED_CTRL_WDMA_INT_AGENT_EN			BIT(2)
++#define MTK_WED_CTRL_WDMA_INT_AGENT_BUSY		BIT(3)
++#define MTK_WED_CTRL_WED_TX_BM_EN			BIT(8)
++#define MTK_WED_CTRL_WED_TX_BM_BUSY			BIT(9)
++#define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN		BIT(10)
++#define MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY		BIT(11)
++#define MTK_WED_CTRL_RESERVE_EN				BIT(12)
++#define MTK_WED_CTRL_RESERVE_BUSY			BIT(13)
++#define MTK_WED_CTRL_FINAL_DIDX_READ			BIT(24)
++#define MTK_WED_CTRL_MIB_READ_CLEAR			BIT(28)
++
++#define MTK_WED_EXT_INT_STATUS				0x020
++#define MTK_WED_EXT_INT_STATUS_TF_LEN_ERR		BIT(0)
++#define MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD		BIT(1)
++#define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID	BIT(4)
++#define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH		BIT(8)
++#define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH		BIT(9)
++#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH		BIT(12)
++#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH		BIT(13)
++#define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR	BIT(16)
++#define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR	BIT(17)
++#define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT		BIT(18)
++#define MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN	BIT(19)
++#define MTK_WED_EXT_INT_STATUS_RX_DRV_BM_DMAD_COHERENT	BIT(20)
++#define MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR	BIT(21)
++#define MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR	BIT(22)
++#define MTK_WED_EXT_INT_STATUS_RX_DRV_DMA_RECYCLE	BIT(24)
++#define MTK_WED_EXT_INT_STATUS_ERROR_MASK		(MTK_WED_EXT_INT_STATUS_TF_LEN_ERR | \
++							 MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD | \
++							 MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID | \
++							 MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR | \
++							 MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR | \
++							 MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN | \
++							 MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR | \
++							 MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR)
++
++#define MTK_WED_EXT_INT_MASK				0x028
++
++#define MTK_WED_STATUS					0x060
++#define MTK_WED_STATUS_TX				GENMASK(15, 8)
++
++#define MTK_WED_TX_BM_CTRL				0x080
++#define MTK_WED_TX_BM_CTRL_VLD_GRP_NUM			GENMASK(6, 0)
++#define MTK_WED_TX_BM_CTRL_RSV_GRP_NUM			GENMASK(22, 16)
++#define MTK_WED_TX_BM_CTRL_PAUSE			BIT(28)
++
++#define MTK_WED_TX_BM_BASE				0x084
++
++#define MTK_WED_TX_BM_TKID				0x088
++#define MTK_WED_TX_BM_TKID_START			GENMASK(15, 0)
++#define MTK_WED_TX_BM_TKID_END				GENMASK(31, 16)
++
++#define MTK_WED_TX_BM_BUF_LEN				0x08c
++
++#define MTK_WED_TX_BM_INTF				0x09c
++#define MTK_WED_TX_BM_INTF_TKID				GENMASK(15, 0)
++#define MTK_WED_TX_BM_INTF_TKFIFO_FDEP			GENMASK(23, 16)
++#define MTK_WED_TX_BM_INTF_TKID_VALID			BIT(28)
++#define MTK_WED_TX_BM_INTF_TKID_READ			BIT(29)
++
++#define MTK_WED_TX_BM_DYN_THR				0x0a0
++#define MTK_WED_TX_BM_DYN_THR_LO			GENMASK(6, 0)
++#define MTK_WED_TX_BM_DYN_THR_HI			GENMASK(22, 16)
++
++#define MTK_WED_INT_STATUS				0x200
++#define MTK_WED_INT_MASK				0x204
++
++#define MTK_WED_GLO_CFG					0x208
++#define MTK_WED_GLO_CFG_TX_DMA_EN			BIT(0)
++#define MTK_WED_GLO_CFG_TX_DMA_BUSY			BIT(1)
++#define MTK_WED_GLO_CFG_RX_DMA_EN			BIT(2)
++#define MTK_WED_GLO_CFG_RX_DMA_BUSY			BIT(3)
++#define MTK_WED_GLO_CFG_RX_BT_SIZE			GENMASK(5, 4)
++#define MTK_WED_GLO_CFG_TX_WB_DDONE			BIT(6)
++#define MTK_WED_GLO_CFG_BIG_ENDIAN			BIT(7)
++#define MTK_WED_GLO_CFG_DIS_BT_SIZE_ALIGN		BIT(8)
++#define MTK_WED_GLO_CFG_TX_BT_SIZE_LO			BIT(9)
++#define MTK_WED_GLO_CFG_MULTI_DMA_EN			GENMASK(11, 10)
++#define MTK_WED_GLO_CFG_FIFO_LITTLE_ENDIAN		BIT(12)
++#define MTK_WED_GLO_CFG_MI_DEPTH_RD			GENMASK(21, 13)
++#define MTK_WED_GLO_CFG_TX_BT_SIZE_HI			GENMASK(23, 22)
++#define MTK_WED_GLO_CFG_SW_RESET			BIT(24)
++#define MTK_WED_GLO_CFG_FIRST_TOKEN_ONLY		BIT(26)
++#define MTK_WED_GLO_CFG_OMIT_RX_INFO			BIT(27)
++#define MTK_WED_GLO_CFG_OMIT_TX_INFO			BIT(28)
++#define MTK_WED_GLO_CFG_BYTE_SWAP			BIT(29)
++#define MTK_WED_GLO_CFG_RX_2B_OFFSET			BIT(31)
++
++#define MTK_WED_RESET_IDX				0x20c
++#define MTK_WED_RESET_IDX_TX				GENMASK(3, 0)
++#define MTK_WED_RESET_IDX_RX				GENMASK(17, 16)
++
++#define MTK_WED_TX_MIB(_n)				(0x2a0 + (_n) * 4)
++
++#define MTK_WED_RING_TX(_n)				(0x300 + (_n) * 0x10)
++
++#define MTK_WED_RING_RX(_n)				(0x400 + (_n) * 0x10)
++
++#define MTK_WED_WPDMA_INT_TRIGGER			0x504
++#define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE		BIT(1)
++#define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE		GENMASK(5, 4)
++
++#define MTK_WED_WPDMA_GLO_CFG				0x508
++#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN			BIT(0)
++#define MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY		BIT(1)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN			BIT(2)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_BUSY		BIT(3)
++#define MTK_WED_WPDMA_GLO_CFG_RX_BT_SIZE		GENMASK(5, 4)
++#define MTK_WED_WPDMA_GLO_CFG_TX_WB_DDONE		BIT(6)
++#define MTK_WED_WPDMA_GLO_CFG_BIG_ENDIAN		BIT(7)
++#define MTK_WED_WPDMA_GLO_CFG_DIS_BT_SIZE_ALIGN		BIT(8)
++#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_LO		BIT(9)
++#define MTK_WED_WPDMA_GLO_CFG_MULTI_DMA_EN		GENMASK(11, 10)
++#define MTK_WED_WPDMA_GLO_CFG_FIFO_LITTLE_ENDIAN	BIT(12)
++#define MTK_WED_WPDMA_GLO_CFG_MI_DEPTH_RD		GENMASK(21, 13)
++#define MTK_WED_WPDMA_GLO_CFG_TX_BT_SIZE_HI		GENMASK(23, 22)
++#define MTK_WED_WPDMA_GLO_CFG_SW_RESET			BIT(24)
++#define MTK_WED_WPDMA_GLO_CFG_FIRST_TOKEN_ONLY		BIT(26)
++#define MTK_WED_WPDMA_GLO_CFG_OMIT_RX_INFO		BIT(27)
++#define MTK_WED_WPDMA_GLO_CFG_OMIT_TX_INFO		BIT(28)
++#define MTK_WED_WPDMA_GLO_CFG_BYTE_SWAP			BIT(29)
++#define MTK_WED_WPDMA_GLO_CFG_RX_2B_OFFSET		BIT(31)
++
++#define MTK_WED_WPDMA_RESET_IDX				0x50c
++#define MTK_WED_WPDMA_RESET_IDX_TX			GENMASK(3, 0)
++#define MTK_WED_WPDMA_RESET_IDX_RX			GENMASK(17, 16)
++
++#define MTK_WED_WPDMA_INT_CTRL				0x520
++#define MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV		BIT(21)
++
++#define MTK_WED_WPDMA_INT_MASK				0x524
++
++#define MTK_WED_PCIE_CFG_BASE				0x560
++
++#define MTK_WED_PCIE_INT_TRIGGER			0x570
++#define MTK_WED_PCIE_INT_TRIGGER_STATUS			BIT(16)
++
++#define MTK_WED_WPDMA_CFG_BASE				0x580
++
++#define MTK_WED_WPDMA_TX_MIB(_n)			(0x5a0 + (_n) * 4)
++#define MTK_WED_WPDMA_TX_COHERENT_MIB(_n)		(0x5d0 + (_n) * 4)
++
++#define MTK_WED_WPDMA_RING_TX(_n)			(0x600 + (_n) * 0x10)
++#define MTK_WED_WPDMA_RING_RX(_n)			(0x700 + (_n) * 0x10)
++#define MTK_WED_WDMA_RING_RX(_n)			(0x900 + (_n) * 0x10)
++#define MTK_WED_WDMA_RX_THRES(_n)			(0x940 + (_n) * 0x4)
++
++#define MTK_WED_WDMA_GLO_CFG				0xa04
++#define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN			BIT(0)
++#define MTK_WED_WDMA_GLO_CFG_RX_DRV_EN			BIT(2)
++#define MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY		BIT(3)
++#define MTK_WED_WDMA_GLO_CFG_BT_SIZE			GENMASK(5, 4)
++#define MTK_WED_WDMA_GLO_CFG_TX_WB_DDONE		BIT(6)
++#define MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE	BIT(13)
++#define MTK_WED_WDMA_GLO_CFG_WCOMPLETE_SEL		BIT(16)
++#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_RXDMA_BYPASS	BIT(17)
++#define MTK_WED_WDMA_GLO_CFG_INIT_PHASE_BYPASS		BIT(18)
++#define MTK_WED_WDMA_GLO_CFG_FSM_RETURN_IDLE		BIT(19)
++#define MTK_WED_WDMA_GLO_CFG_WAIT_COHERENT		BIT(20)
++#define MTK_WED_WDMA_GLO_CFG_AXI_W_AFTER_AW		BIT(21)
++#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY_SINGLE_W	BIT(22)
++#define MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY		BIT(23)
++#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_SKIP_DMAD_PREP	BIT(24)
++#define MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE	BIT(25)
++#define MTK_WED_WDMA_GLO_CFG_RST_INIT_COMPLETE		BIT(26)
++#define MTK_WED_WDMA_GLO_CFG_RXDRV_CLKGATE_BYPASS	BIT(30)
++
++#define MTK_WED_WDMA_RESET_IDX				0xa08
++#define MTK_WED_WDMA_RESET_IDX_RX			GENMASK(17, 16)
++#define MTK_WED_WDMA_RESET_IDX_DRV			GENMASK(25, 24)
++
++#define MTK_WED_WDMA_INT_TRIGGER			0xa28
++#define MTK_WED_WDMA_INT_TRIGGER_RX_DONE		GENMASK(17, 16)
++
++#define MTK_WED_WDMA_INT_CTRL				0xa2c
++#define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL		GENMASK(17, 16)
++
++#define MTK_WED_WDMA_OFFSET0				0xaa4
++#define MTK_WED_WDMA_OFFSET1				0xaa8
++
++#define MTK_WED_WDMA_RX_MIB(_n)				(0xae0 + (_n) * 4)
++#define MTK_WED_WDMA_RX_RECYCLE_MIB(_n)			(0xae8 + (_n) * 4)
++#define MTK_WED_WDMA_RX_PROCESSED_MIB(_n)		(0xaf0 + (_n) * 4)
++
++#define MTK_WED_RING_OFS_BASE				0x00
++#define MTK_WED_RING_OFS_COUNT				0x04
++#define MTK_WED_RING_OFS_CPU_IDX			0x08
++#define MTK_WED_RING_OFS_DMA_IDX			0x0c
++
++#define MTK_WDMA_RING_RX(_n)				(0x100 + (_n) * 0x10)
++
++#define MTK_WDMA_GLO_CFG				0x204
++#define MTK_WDMA_GLO_CFG_RX_INFO_PRERES			GENMASK(28, 26)
++
++#define MTK_WDMA_RESET_IDX				0x208
++#define MTK_WDMA_RESET_IDX_TX				GENMASK(3, 0)
++#define MTK_WDMA_RESET_IDX_RX				GENMASK(17, 16)
++
++#define MTK_WDMA_INT_MASK				0x228
++#define MTK_WDMA_INT_MASK_TX_DONE			GENMASK(3, 0)
++#define MTK_WDMA_INT_MASK_RX_DONE			GENMASK(17, 16)
++#define MTK_WDMA_INT_MASK_TX_DELAY			BIT(28)
++#define MTK_WDMA_INT_MASK_TX_COHERENT			BIT(29)
++#define MTK_WDMA_INT_MASK_RX_DELAY			BIT(30)
++#define MTK_WDMA_INT_MASK_RX_COHERENT			BIT(31)
++
++#define MTK_WDMA_INT_GRP1				0x250
++#define MTK_WDMA_INT_GRP2				0x254
++
++#define MTK_PCIE_MIRROR_MAP(n)				((n) ? 0x4 : 0x0)
++#define MTK_PCIE_MIRROR_MAP_EN				BIT(0)
++#define MTK_PCIE_MIRROR_MAP_WED_ID			BIT(1)
++
++/* DMA channel mapping */
++#define HIFSYS_DMA_AG_MAP				0x008
++
++#endif
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 9f64504ac..35998b1a7 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -835,6 +835,7 @@ enum net_device_path_type {
+ 	DEV_PATH_BRIDGE,
+ 	DEV_PATH_PPPOE,
+ 	DEV_PATH_DSA,
++	DEV_PATH_MTK_WDMA,
+ };
+ 
+ struct net_device_path {
+@@ -860,6 +861,12 @@ struct net_device_path {
+ 			int port;
+ 			u16 proto;
+ 		} dsa;
++		struct {
++			u8 wdma_idx;
++			u8 queue;
++			u16 wcid;
++			u8 bss;
++		} mtk_wdma;
+ 	};
+ };
+ 
+diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
+new file mode 100644
+index 000000000..7e00cca06
+--- /dev/null
++++ b/include/linux/soc/mediatek/mtk_wed.h
+@@ -0,0 +1,131 @@
++#ifndef __MTK_WED_H
++#define __MTK_WED_H
++
++#include <linux/kernel.h>
++#include <linux/rcupdate.h>
++#include <linux/regmap.h>
++#include <linux/pci.h>
++
++#define MTK_WED_TX_QUEUES		2
++
++struct mtk_wed_hw;
++struct mtk_wdma_desc;
++
++struct mtk_wed_ring {
++	struct mtk_wdma_desc *desc;
++	dma_addr_t desc_phys;
++	int size;
++
++	u32 reg_base;
++	void __iomem *wpdma;
++};
++
++struct mtk_wed_device {
++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
++	const struct mtk_wed_ops *ops;
++	struct device *dev;
++	struct mtk_wed_hw *hw;
++	bool init_done, running;
++	int wdma_idx;
++	int irq;
++
++	struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
++	struct mtk_wed_ring txfree_ring;
++	struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
++
++	struct {
++		int size;
++		void **pages;
++		struct mtk_wdma_desc *desc;
++		dma_addr_t desc_phys;
++	} buf_ring;
++
++	/* filled by driver: */
++	struct {
++		struct pci_dev *pci_dev;
++
++		u32 wpdma_phys;
++
++		u16 token_start;
++		unsigned int nbuf;
++
++		u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
++		int (*offload_enable)(struct mtk_wed_device *wed);
++		void (*offload_disable)(struct mtk_wed_device *wed);
++	} wlan;
++#endif
++};
++
++struct mtk_wed_ops {
++	int (*attach)(struct mtk_wed_device *dev);
++	int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring,
++			     void __iomem *regs);
++	int (*txfree_ring_setup)(struct mtk_wed_device *dev,
++				 void __iomem *regs);
++	void (*detach)(struct mtk_wed_device *dev);
++
++	void (*stop)(struct mtk_wed_device *dev);
++	void (*start)(struct mtk_wed_device *dev, u32 irq_mask);
++	void (*reset_dma)(struct mtk_wed_device *dev);
++
++	u32 (*reg_read)(struct mtk_wed_device *dev, u32 reg);
++	void (*reg_write)(struct mtk_wed_device *dev, u32 reg, u32 val);
++
++	u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask);
++	void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
++};
++
++extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
++
++static inline int
++mtk_wed_device_attach(struct mtk_wed_device *dev)
++{
++	int ret = -ENODEV;
++
++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
++	rcu_read_lock();
++	dev->ops = rcu_dereference(mtk_soc_wed_ops);
++	if (dev->ops)
++		ret = dev->ops->attach(dev);
++	else
++		rcu_read_unlock();
++
++	if (ret)
++		dev->ops = NULL;
++#endif
++
++	return ret;
++}
++
++#ifdef CONFIG_NET_MEDIATEK_SOC_WED
++#define mtk_wed_device_active(_dev) !!(_dev)->ops
++#define mtk_wed_device_detach(_dev) (_dev)->ops->detach(_dev)
++#define mtk_wed_device_start(_dev, _mask) (_dev)->ops->start(_dev, _mask)
++#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) \
++	(_dev)->ops->tx_ring_setup(_dev, _ring, _regs)
++#define mtk_wed_device_txfree_ring_setup(_dev, _regs) \
++	(_dev)->ops->txfree_ring_setup(_dev, _regs)
++#define mtk_wed_device_reg_read(_dev, _reg) \
++	(_dev)->ops->reg_read(_dev, _reg)
++#define mtk_wed_device_reg_write(_dev, _reg, _val) \
++	(_dev)->ops->reg_write(_dev, _reg, _val)
++#define mtk_wed_device_irq_get(_dev, _mask) \
++	(_dev)->ops->irq_get(_dev, _mask)
++#define mtk_wed_device_irq_set_mask(_dev, _mask) \
++	(_dev)->ops->irq_set_mask(_dev, _mask)
++#else
++static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
++{
++	return false;
++}
++#define mtk_wed_device_detach(_dev) do {} while (0)
++#define mtk_wed_device_start(_dev, _mask) do {} while (0)
++#define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV
++#define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV
++#define mtk_wed_device_reg_read(_dev, _reg) 0
++#define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
++#define mtk_wed_device_irq_get(_dev, _mask) 0
++#define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
++#endif
++
++#endif
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 4f0edb218..031ac7c6f 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -675,6 +675,10 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
+ 		if (WARN_ON_ONCE(last_dev == ctx.dev))
+ 			return -1;
+ 	}
++
++	if (!ctx.dev)
++		return ret;
++
+ 	path = dev_fwd_path(stack);
+ 	if (!path)
+ 		return -1;
+-- 
+2.18.0
+
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9994-ethernet-update-ppe-from-mt7622-to-mt7986.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9994-ethernet-update-ppe-from-mt7622-to-mt7986.patch
new file mode 100755
index 0000000..c2564ba
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9994-ethernet-update-ppe-from-mt7622-to-mt7986.patch
@@ -0,0 +1,344 @@
+From d86af0076cbf7d99bdb4f28115159643b79ad3fa Mon Sep 17 00:00:00 2001
+From: Sujuan Chen <sujuan.chen@mediatek.com>
+Date: Wed, 18 May 2022 11:08:15 +0800
+Subject: [PATCH 5/8] 9994-ethernet-update-ppe-from-mt7622-to-mt7986
+
+Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c   | 14 +++-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h   |  7 +-
+ drivers/net/ethernet/mediatek/mtk_ppe.c       | 24 ++++---
+ drivers/net/ethernet/mediatek/mtk_ppe.h       | 69 ++++++++++---------
+ .../net/ethernet/mediatek/mtk_ppe_offload.c   |  7 +-
+ drivers/net/ethernet/mediatek/mtk_ppe_regs.h  | 10 +++
+ 6 files changed, 86 insertions(+), 45 deletions(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 2121335a1..01fc1e5c0 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1467,16 +1467,27 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 			skb_checksum_none_assert(skb);
+ 		skb->protocol = eth_type_trans(skb, netdev);
+ 
+-		hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
++#if defined(CONFIG_MEDIATEK_NETSYS_V2)
++			hash = trxd.rxd5 & MTK_RXD5_FOE_ENTRY_V2;
++#else
++			hash = trxd.rxd4 & MTK_RXD4_FOE_ENTRY;
++#endif
+ 		if (hash != MTK_RXD4_FOE_ENTRY) {
+ 			hash = jhash_1word(hash, 0);
+ 			skb_set_hash(skb, hash, PKT_HASH_TYPE_L4);
+ 		}
+ 
++#if defined(CONFIG_MEDIATEK_NETSYS_V2)
++		reason = FIELD_GET(MTK_RXD5_PPE_CPU_REASON_V2, trxd.rxd5);
++		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
++			mtk_ppe_check_skb(eth->ppe, skb,
++					  trxd.rxd5 & MTK_RXD5_FOE_ENTRY_V2);
++#else
+ 		reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4);
+ 		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
+ 			mtk_ppe_check_skb(eth->ppe, skb,
+ 					  trxd.rxd4 & MTK_RXD4_FOE_ENTRY);
++#endif
+ 
+ 		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+ 			if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
+@@ -3926,6 +3937,7 @@ static const struct mtk_soc_data mt7986_data = {
+ 	.required_clks = MT7986_CLKS_BITMAP,
+ 	.required_pctl = false,
+ 	.has_sram = true,
++	.offload_version = 2,
+ };
+ 
+ static const struct mtk_soc_data mt7981_data = {
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index b52378bd6..fce1a7172 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -110,7 +110,7 @@
+ #define MTK_GDMA_TCS_EN		BIT(21)
+ #define MTK_GDMA_UCS_EN		BIT(20)
+ #define MTK_GDMA_TO_PDMA	0x0
+-#define MTK_GDMA_TO_PPE		0x4444
++#define MTK_GDMA_TO_PPE		0x3333
+ #define MTK_GDMA_DROP_ALL	0x7777
+ 
+ /* Unicast Filter MAC Address Register - Low */
+@@ -560,6 +560,11 @@
+ #define MTK_RXD4_SRC_PORT	GENMASK(21, 19)
+ #define MTK_RXD4_ALG		GENMASK(31, 22)
+ 
++/* QDMA descriptor rxd4 */
++#define MTK_RXD5_FOE_ENTRY_V2	GENMASK(14, 0)
++#define MTK_RXD5_PPE_CPU_REASON_V2	GENMASK(22, 18)
++#define MTK_RXD5_SRC_PORT_V2	GENMASK(29, 26)
++
+ /* QDMA descriptor rxd4 */
+ #define RX_DMA_L4_VALID		BIT(24)
+ #define RX_DMA_L4_VALID_PDMA	BIT(30)		/* when PDMA is used */
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+index 3d75c22be..d46e91178 100755
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -122,7 +122,7 @@ static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
+ 	hash = (hash >> 24) | ((hash & 0xffffff) << 8);
+ 	hash ^= hv1 ^ hv2 ^ hv3;
+ 	hash ^= hash >> 16;
+-	hash <<= 1;
++	hash <<= 2;
+ 	hash &= MTK_PPE_ENTRIES - 1;
+ 
+ 	return hash;
+@@ -171,8 +171,7 @@ int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
+ 	      MTK_FOE_IB1_BIND_CACHE;
+ 	entry->ib1 = val;
+ 
+-	val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
+-	      FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
++	val = FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0xf) |
+ 	      FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
+ 
+ 	if (is_multicast_ether_addr(dest_mac))
+@@ -359,12 +358,10 @@ int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq,
+ 
+ 	*ib2 &= ~MTK_FOE_IB2_PORT_MG;
+ 	*ib2 |= MTK_FOE_IB2_WDMA_WINFO;
+-	if (wdma_idx)
+-		*ib2 |= MTK_FOE_IB2_WDMA_DEVIDX;
++	*ib2 |=  FIELD_PREP(MTK_FOE_IB2_RX_IDX, txq);
+ 
+-	l2->vlan2 = FIELD_PREP(MTK_FOE_VLAN2_WINFO_BSS, bss) |
+-		    FIELD_PREP(MTK_FOE_VLAN2_WINFO_WCID, wcid) |
+-		    FIELD_PREP(MTK_FOE_VLAN2_WINFO_RING, txq);
++	l2->winfo = FIELD_PREP(MTK_FOE_WINFO_WCID, wcid) |
++		    FIELD_PREP(MTK_FOE_WINFO_BSS, bss);
+ 
+ 	return 0;
+ }
+@@ -741,6 +738,7 @@ int mtk_ppe_start(struct mtk_ppe *ppe)
+ 	      MTK_PPE_TB_CFG_AGE_TCP |
+ 	      MTK_PPE_TB_CFG_AGE_UDP |
+ 	      MTK_PPE_TB_CFG_AGE_TCP_FIN |
++	      MTK_PPE_TB_CFG_INFO_SEL |
+ 	      FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
+ 			 MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
+ 	      FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
+@@ -757,7 +755,9 @@ int mtk_ppe_start(struct mtk_ppe *ppe)
+ 
+ 	mtk_ppe_cache_enable(ppe, true);
+ 
+-	val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
++	val = MTK_PPE_MD_TOAP_BYP_CRSN0 |
++	      MTK_PPE_MD_TOAP_BYP_CRSN1 |
++	      MTK_PPE_MD_TOAP_BYP_CRSN2 |
+ 	      MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
+ 	      MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
+ 	      MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
+@@ -765,7 +765,8 @@ int mtk_ppe_start(struct mtk_ppe *ppe)
+ 	      MTK_PPE_FLOW_CFG_IP4_NAT |
+ 	      MTK_PPE_FLOW_CFG_IP4_NAPT |
+ 	      MTK_PPE_FLOW_CFG_IP4_DSLITE |
+-	      MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
++	      MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY |
++	      MTK_PPE_FLOW_CFG_IP4_NAT_FRAG ;
+ 	ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
+ 
+ 	val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
+@@ -800,6 +801,9 @@ int mtk_ppe_start(struct mtk_ppe *ppe)
+ 
+ 	ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
+ 
++	ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT1, 0xcb777);
++	ppe_w32(ppe, MTK_PPE_SBW_CTRL, 0x7f);
++
+ 	return 0;
+ }
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
+index 1f5cf1c9a..a76f4b0ac 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
+@@ -8,7 +8,7 @@
+ #include <linux/bitfield.h>
+ #include <linux/rhashtable.h>
+ 
+-#define MTK_ETH_PPE_BASE		0xc00
++#define MTK_ETH_PPE_BASE		0x2000
+ 
+ #define MTK_PPE_ENTRIES_SHIFT		3
+ #define MTK_PPE_ENTRIES			(1024 << MTK_PPE_ENTRIES_SHIFT)
+@@ -16,20 +16,23 @@
+ #define MTK_PPE_WAIT_TIMEOUT_US		1000000
+ 
+ #define MTK_FOE_IB1_UNBIND_TIMESTAMP	GENMASK(7, 0)
+-#define MTK_FOE_IB1_UNBIND_PACKETS	GENMASK(23, 8)
+-#define MTK_FOE_IB1_UNBIND_PREBIND	BIT(24)
+-
+-#define MTK_FOE_IB1_BIND_TIMESTAMP	GENMASK(14, 0)
+-#define MTK_FOE_IB1_BIND_KEEPALIVE	BIT(15)
+-#define MTK_FOE_IB1_BIND_VLAN_LAYER	GENMASK(18, 16)
+-#define MTK_FOE_IB1_BIND_PPPOE		BIT(19)
+-#define MTK_FOE_IB1_BIND_VLAN_TAG	BIT(20)
+-#define MTK_FOE_IB1_BIND_PKT_SAMPLE	BIT(21)
+-#define MTK_FOE_IB1_BIND_CACHE		BIT(22)
+-#define MTK_FOE_IB1_BIND_TUNNEL_DECAP	BIT(23)
+-#define MTK_FOE_IB1_BIND_TTL		BIT(24)
+-
+-#define MTK_FOE_IB1_PACKET_TYPE		GENMASK(27, 25)
++#define MTK_FOE_IB1_UNBIND_SRC_PORT	GENMASK(11, 8)
++#define MTK_FOE_IB1_UNBIND_PACKETS	GENMASK(19, 12)
++#define MTK_FOE_IB1_UNBIND_PREBIND	BIT(22)
++#define MTK_FOE_IB1_UNBIND_PACKET_TYPE	GENMASK(27, 23)
++#define MTK_FOE_IB1_BIND_TIMESTAMP	GENMASK(7, 0)
++#define MTK_FOE_IB1_BIND_SRC_PORT	GENMASK(11, 8)
++#define MTK_FOE_IB1_BIND_MC		BIT(12)
++#define MTK_FOE_IB1_BIND_KEEPALIVE	BIT(13)
++#define MTK_FOE_IB1_BIND_VLAN_LAYER	GENMASK(16, 14)
++#define MTK_FOE_IB1_BIND_PPPOE		BIT(17)
++#define MTK_FOE_IB1_BIND_VLAN_TAG	BIT(18)
++#define MTK_FOE_IB1_BIND_PKT_SAMPLE	BIT(19)
++#define MTK_FOE_IB1_BIND_CACHE		BIT(20)
++#define MTK_FOE_IB1_BIND_TUNNEL_DECAP	BIT(21)
++#define MTK_FOE_IB1_BIND_TTL		BIT(22)
++#define MTK_FOE_IB1_PACKET_TYPE		GENMASK(27, 23)
++
+ #define MTK_FOE_IB1_STATE		GENMASK(29, 28)
+ #define MTK_FOE_IB1_UDP			BIT(30)
+ #define MTK_FOE_IB1_STATIC		BIT(31)
+@@ -44,24 +47,19 @@ enum {
+ 	MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
+ };
+ 
+-#define MTK_FOE_IB2_QID			GENMASK(3, 0)
+-#define MTK_FOE_IB2_PSE_QOS		BIT(4)
+-#define MTK_FOE_IB2_DEST_PORT		GENMASK(7, 5)
+-#define MTK_FOE_IB2_MULTICAST		BIT(8)
+-
+-#define MTK_FOE_IB2_WDMA_QID2		GENMASK(13, 12)
+-#define MTK_FOE_IB2_WDMA_DEVIDX		BIT(16)
+-#define MTK_FOE_IB2_WDMA_WINFO		BIT(17)
+-
+-#define MTK_FOE_IB2_PORT_MG		GENMASK(17, 12)
+-
+-#define MTK_FOE_IB2_PORT_AG		GENMASK(23, 18)
+-
++#define MTK_FOE_IB2_QID			GENMASK(6, 0)
++#define MTK_FOE_IB2_PORT_MG		BIT(7)
++#define MTK_FOE_IB2_PSE_QOS		BIT(8)
++#define MTK_FOE_IB2_DEST_PORT		GENMASK(12, 9)
++#define MTK_FOE_IB2_MULTICAST		BIT(13)
++#define MTK_FOE_IB2_MIB_CNT		BIT(15)
++#define MTK_FOE_IB2_RX_IDX		GENMASK(18, 17)
++#define MTK_FOE_IB2_WDMA_WINFO		BIT(19)
++#define MTK_FOE_IB2_PORT_AG		GENMASK(23, 20)
+ #define MTK_FOE_IB2_DSCP		GENMASK(31, 24)
+ 
+-#define MTK_FOE_VLAN2_WINFO_BSS		GENMASK(5, 0)
+-#define MTK_FOE_VLAN2_WINFO_WCID	GENMASK(13, 6)
+-#define MTK_FOE_VLAN2_WINFO_RING	GENMASK(15, 14)
++#define MTK_FOE_WINFO_BSS		GENMASK(5, 0)
++#define MTK_FOE_WINFO_WCID		GENMASK(15, 6)
+ 
+ enum {
+ 	MTK_FOE_STATE_INVALID,
+@@ -83,6 +81,9 @@ struct mtk_foe_mac_info {
+ 
+ 	u16 pppoe_id;
+ 	u16 src_mac_lo;
++
++	u16 minfo;
++	u16 winfo;
+ };
+ 
+ /* software-only entry type */
+@@ -96,6 +97,10 @@ struct mtk_foe_bridge {
+ 	u32 ib2;
+ 
+ 	struct mtk_foe_mac_info l2;
++	u32 new_sip;
++	u32 new_dip;
++	u16 new_dport;
++	u16 new_sport;
+ };
+ 
+ struct mtk_ipv4_tuple {
+@@ -200,7 +205,7 @@ struct mtk_foe_entry {
+ 		struct mtk_foe_ipv4_dslite dslite;
+ 		struct mtk_foe_ipv6 ipv6;
+ 		struct mtk_foe_ipv6_6rd ipv6_6rd;
+-		u32 data[19];
++		u32 data[23];
+ 	};
+ };
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+index d4a012608..5a4201447 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -192,7 +192,12 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+ 	if (mtk_flow_get_wdma_info(dev, dest_mac, &info) == 0) {
+ 		mtk_foe_entry_set_wdma(foe, info.wdma_idx, info.queue, info.bss,
+ 				       info.wcid);
+-		pse_port = 3;
++		if (info.wdma_idx == 0)
++			pse_port = 8;
++		else if (info.wdma_idx == 1)
++			pse_port = 9;
++		else
++			return -EOPNOTSUPP;
+ 		*wed_index = info.wdma_idx;
+ 		goto out;
+ 	}
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+index 0c45ea090..d319f1861 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
+@@ -21,6 +21,9 @@
+ #define MTK_PPE_GLO_CFG_BUSY			BIT(31)
+ 
+ #define MTK_PPE_FLOW_CFG			0x204
++#define MTK_PPE_MD_TOAP_BYP_CRSN0		BIT(1)
++#define MTK_PPE_MD_TOAP_BYP_CRSN1		BIT(2)
++#define MTK_PPE_MD_TOAP_BYP_CRSN2		BIT(3)
+ #define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG		BIT(6)
+ #define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG		BIT(7)
+ #define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE		BIT(8)
+@@ -35,6 +38,8 @@
+ #define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL	BIT(18)
+ #define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY	BIT(19)
+ #define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY	BIT(20)
++#define MTK_PPE_FLOW_CFG_IPV4_MAPE_EN		BIT(21)
++#define MTK_PPE_FLOW_CFG_IPV4_MAPT_EN		BIT(22)
+ 
+ #define MTK_PPE_IP_PROTO_CHK			0x208
+ #define MTK_PPE_IP_PROTO_CHK_IPV4		GENMASK(15, 0)
+@@ -54,6 +59,7 @@
+ #define MTK_PPE_TB_CFG_HASH_MODE		GENMASK(15, 14)
+ #define MTK_PPE_TB_CFG_SCAN_MODE		GENMASK(17, 16)
+ #define MTK_PPE_TB_CFG_HASH_DEBUG		GENMASK(19, 18)
++#define MTK_PPE_TB_CFG_INFO_SEL			BIT(20)
+ 
+ enum {
+ 	MTK_PPE_SCAN_MODE_DISABLED,
+@@ -111,6 +117,8 @@ enum {
+ 
+ #define MTK_PPE_DEFAULT_CPU_PORT		0x248
+ #define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n)	(GENMASK(2, 0) << ((_n) * 4))
++#define MTK_PPE_DEFAULT_CPU_PORT1		0x24C
++#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n)	(GENMASK(2, 0) << ((_n) * 4))
+ 
+ #define MTK_PPE_MTU_DROP			0x308
+ 
+@@ -141,4 +149,6 @@ enum {
+ #define MTK_PPE_MIB_CACHE_CTL_EN		BIT(0)
+ #define MTK_PPE_MIB_CACHE_CTL_FLUSH		BIT(2)
+ 
++#define MTK_PPE_SBW_CTRL			0x374
++
+ #endif
+-- 
+2.18.0
+
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9995-flow-offload-add-mkhnat-dual-ppe-new-v2.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9995-flow-offload-add-mkhnat-dual-ppe-new-v2.patch
new file mode 100755
index 0000000..0eb1117
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9995-flow-offload-add-mkhnat-dual-ppe-new-v2.patch
@@ -0,0 +1,474 @@
+From a59cb5c770a694cb34ab179ec59e91ba5c39908b Mon Sep 17 00:00:00 2001
+From: Bo Jiao <Bo.Jiao@mediatek.com>
+Date: Mon, 27 Jun 2022 14:48:35 +0800
+Subject: [PATCH 6/8] 9995-flow-offload-add-mkhnat-dual-ppe-new-v2
+
+---
+ arch/arm64/boot/dts/mediatek/mt7986a.dtsi     |  1 +
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c   | 67 ++++++++++++++-----
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h   | 10 ++-
+ drivers/net/ethernet/mediatek/mtk_ppe.c       |  5 +-
+ drivers/net/ethernet/mediatek/mtk_ppe.h       |  7 +-
+ .../net/ethernet/mediatek/mtk_ppe_debugfs.c   | 27 ++++++--
+ .../net/ethernet/mediatek/mtk_ppe_offload.c   | 45 ++++++++++---
+ include/linux/netdevice.h                     |  4 ++
+ 8 files changed, 125 insertions(+), 41 deletions(-)
+ mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+
+diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+index 7f78de6b9..381136c21 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+@@ -479,6 +479,7 @@
+                 mediatek,ethsys = <&ethsys>;
+ 		mediatek,sgmiisys = <&sgmiisys0>, <&sgmiisys1>;
+ 		mediatek,wed = <&wed0>, <&wed1>;
++                mtketh-ppe-num = <2>;
+                 #reset-cells = <1>;
+                 #address-cells = <1>;
+                 #size-cells = <0>;
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 01fc1e5c0..3f67bebfe 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1379,6 +1379,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 	u8 *data, *new_data;
+ 	struct mtk_rx_dma *rxd, trxd;
+ 	int done = 0;
++	int i;
+ 
+ 	if (unlikely(!ring))
+ 		goto rx_done;
+@@ -1479,14 +1480,20 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget,
+ 
+ #if defined(CONFIG_MEDIATEK_NETSYS_V2)
+ 		reason = FIELD_GET(MTK_RXD5_PPE_CPU_REASON_V2, trxd.rxd5);
+-		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
+-			mtk_ppe_check_skb(eth->ppe, skb,
+-					  trxd.rxd5 & MTK_RXD5_FOE_ENTRY_V2);
++		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) {
++			for (i = 0; i < eth->ppe_num; i++) {
++				mtk_ppe_check_skb(eth->ppe[i], skb,
++						  trxd.rxd5 & MTK_RXD5_FOE_ENTRY_V2);
++			}
++		}
+ #else
+ 		reason = FIELD_GET(MTK_RXD4_PPE_CPU_REASON, trxd.rxd4);
+-		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
+-			mtk_ppe_check_skb(eth->ppe, skb,
+-					  trxd.rxd4 & MTK_RXD4_FOE_ENTRY);
++		if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) {
++			for (i = 0; i < eth->ppe_num; i++) {
++				mtk_ppe_check_skb(eth->ppe[i], skb,
++						  trxd.rxd4 & MTK_RXD4_FOE_ENTRY);
++			}
++		}
+ #endif
+ 
+ 		if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) {
+@@ -2687,8 +2694,12 @@ static int mtk_open(struct net_device *dev)
+ 		if (err)
+ 			return err;
+ 
+-		if (eth->soc->offload_version && mtk_ppe_start(eth->ppe) == 0)
+-			gdm_config = MTK_GDMA_TO_PPE;
++		if (eth->soc->offload_version) {
++			gdm_config = MTK_GDMA_TO_PPE0;
++
++			for (i = 0; i < eth->ppe_num; i++)
++				mtk_ppe_start(eth->ppe[i]);
++		}
+ 
+ 		mtk_gdm_config(eth, gdm_config);
+ 
+@@ -2803,8 +2814,10 @@ static int mtk_stop(struct net_device *dev)
+ 
+ 	mtk_dma_free(eth);
+ 
+-	if (eth->soc->offload_version)
+-		mtk_ppe_stop(eth->ppe);
++	if (eth->soc->offload_version) {
++		for (i = 0; i < eth->ppe_num; i++)
++			mtk_ppe_stop(eth->ppe[i]);
++	}
+ 
+ 	return 0;
+ }
+@@ -3780,15 +3793,35 @@ static int mtk_probe(struct platform_device *pdev)
+ 	}
+ 
+ 	if (eth->soc->offload_version) {
+-		eth->ppe = mtk_ppe_init(eth, eth->base + MTK_ETH_PPE_BASE, 2);
+-		if (!eth->ppe) {
+-			err = -ENOMEM;
+-			goto err_free_dev;
++		unsigned int val;
++ 
++		err = of_property_read_u32_index(pdev->dev.of_node, "mtketh-ppe-num", 0, &val);
++		if (err < 0)
++			eth->ppe_num = 1;
++		else
++			eth->ppe_num = val;
++ 
++		if (eth->ppe_num > MTK_MAX_PPE_NUM) {
++			dev_warn(&pdev->dev, "%d is not a valid ppe num, please check mtketh-ppe-num in dts !", eth->ppe_num);
++			eth->ppe_num = MTK_MAX_PPE_NUM;
+ 		}
+ 
+-		err = mtk_eth_offload_init(eth);
+-		if (err)
+-			goto err_free_dev;
++		dev_info(&pdev->dev, "ppe num = %d\n", eth->ppe_num);
++
++		for (i = 0; i < eth->ppe_num; i++) {
++			eth->ppe[i] = mtk_ppe_init(eth,
++					   eth->base + MTK_ETH_PPE_BASE + i * 0x400, 2, i);
++			if (!eth->ppe[i]) {
++				err = -ENOMEM;
++				goto err_free_dev;
++			}
++
++			err = mtk_eth_offload_init(eth, i);
++			if (err)
++				goto err_free_dev;
++		}
++
++		mtk_ppe_debugfs_init(eth);
+ 	}
+ 
+ 	for (i = 0; i < MTK_MAX_DEVS; i++) {
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index fce1a7172..b4de7c0c6 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -110,7 +110,8 @@
+ #define MTK_GDMA_TCS_EN		BIT(21)
+ #define MTK_GDMA_UCS_EN		BIT(20)
+ #define MTK_GDMA_TO_PDMA	0x0
+-#define MTK_GDMA_TO_PPE		0x3333
++#define MTK_GDMA_TO_PPE0	0x3333
++#define MTK_GDMA_TO_PPE1	0x4444
+ #define MTK_GDMA_DROP_ALL	0x7777
+ 
+ /* Unicast Filter MAC Address Register - Low */
+@@ -1299,7 +1300,8 @@ struct mtk_eth {
+ 	spinlock_t			syscfg0_lock;
+ 	struct timer_list		mtk_dma_monitor_timer;
+ 
+-	struct mtk_ppe			*ppe;
++	u8				ppe_num;
++	struct mtk_ppe			*ppe[MTK_MAX_PPE_NUM];
+ 	struct rhashtable		flow_table;
+ };
+ 
+@@ -1349,9 +1351,11 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
+ void mtk_gdm_config(struct mtk_eth *eth, u32 config);
+ void ethsys_reset(struct mtk_eth *eth, u32 reset_bits);
+ 
+-int mtk_eth_offload_init(struct mtk_eth *eth);
++int mtk_eth_offload_init(struct mtk_eth *eth, int id);
+ int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ 		     void *type_data);
+ void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
+ 
++int mtk_ppe_debugfs_init(struct mtk_eth *eth);
++
+ #endif /* MTK_ETH_H */
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
+index d46e91178..3d6ff30ba 100755
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
+@@ -677,7 +677,7 @@ int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry)
+ }
+ 
+ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base,
+-		 int version)
++		 int version, int id)
+ {
+ 	struct device *dev = eth->dev;
+ 	struct mtk_foe_entry *foe;
+@@ -696,6 +696,7 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base,
+ 	ppe->eth = eth;
+ 	ppe->dev = dev;
+ 	ppe->version = version;
++	ppe->id = id;
+ 
+ 	foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
+ 				  &ppe->foe_phys, GFP_KERNEL);
+@@ -704,8 +705,6 @@ struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base,
+ 
+ 	ppe->foe_table = foe;
+ 
+-	mtk_ppe_debugfs_init(ppe);
+-
+ 	return ppe;
+ }
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
+index a76f4b0ac..21cc55145 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe.h
++++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
+@@ -8,6 +8,8 @@
+ #include <linux/bitfield.h>
+ #include <linux/rhashtable.h>
+ 
++#define MTK_MAX_PPE_NUM			2
++
+ #define MTK_ETH_PPE_BASE		0x2000
+ 
+ #define MTK_PPE_ENTRIES_SHIFT		3
+@@ -253,6 +255,7 @@ struct mtk_flow_entry {
+ 		};
+ 	};
+ 	u8 type;
++	s8 ppe_index;
+ 	s8 wed_index;
+ 	u16 hash;
+ 	union {
+@@ -272,6 +275,7 @@ struct mtk_ppe {
+ 	struct device *dev;
+ 	void __iomem *base;
+ 	int version;
++	int id;
+ 
+ 	struct mtk_foe_entry *foe_table;
+ 	dma_addr_t foe_phys;
+@@ -284,7 +288,7 @@ struct mtk_ppe {
+ 	void *acct_table;
+ };
+ 
+-struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version);
++struct mtk_ppe *mtk_ppe_init(struct mtk_eth *eth, void __iomem *base, int version, int id);
+ int mtk_ppe_start(struct mtk_ppe *ppe);
+ int mtk_ppe_stop(struct mtk_ppe *ppe);
+ 
+@@ -335,6 +339,5 @@ int mtk_foe_entry_set_wdma(struct mtk_foe_entry *entry, int wdma_idx, int txq,
+ int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+ void mtk_foe_entry_clear(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+ int mtk_foe_entry_idle_time(struct mtk_ppe *ppe, struct mtk_flow_entry *entry);
+-int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
+ 
+ #endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+index a591ab1fd..f4ebe5944 100644
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+@@ -73,9 +73,8 @@ mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
+ }
+ 
+ static int
+-mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
++mtk_ppe_debugfs_foe_show(struct seq_file *m, struct mtk_ppe *ppe, bool bind)
+ {
+-	struct mtk_ppe *ppe = m->private;
+ 	int i;
+ 
+ 	for (i = 0; i < MTK_PPE_ENTRIES; i++) {
+@@ -122,6 +121,8 @@ mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
+ 			break;
+ 		}
+ 
++		seq_printf(m, " ppe=%d", ppe->id);
++
+ 		seq_printf(m, " orig=");
+ 		mtk_print_addr_info(m, &ai);
+ 
+@@ -164,13 +165,25 @@ mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
+ static int
+ mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
+ {
+-	return mtk_ppe_debugfs_foe_show(m, private, false);
++	struct mtk_eth *eth = m->private;
++	int i;
++
++	for (i = 0; i < eth->ppe_num; i++)
++		mtk_ppe_debugfs_foe_show(m, eth->ppe[i], false);
++
++	return 0;
+ }
+ 
+ static int
+ mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
+ {
+-	return mtk_ppe_debugfs_foe_show(m, private, true);
++	struct mtk_eth *eth = m->private;
++	int i;
++
++	for (i = 0; i < eth->ppe_num; i++)
++		mtk_ppe_debugfs_foe_show(m, eth->ppe[i], true);
++
++	return 0;
+ }
+ 
+ static int
+@@ -187,7 +200,7 @@ mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
+ 			   inode->i_private);
+ }
+ 
+-int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
++int mtk_ppe_debugfs_init(struct mtk_eth *eth)
+ {
+ 	static const struct file_operations fops_all = {
+ 		.open = mtk_ppe_debugfs_foe_open_all,
+@@ -209,8 +222,8 @@ int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
+ 	if (!root)
+ 		return -ENOMEM;
+ 
+-	debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
+-	debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
++	debugfs_create_file("entries", S_IRUGO, root, eth, &fops_all);
++	debugfs_create_file("bind", S_IRUGO, root, eth, &fops_bind);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+old mode 100644
+new mode 100755
+index 5a4201447..2f7d76d3b
+--- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
++++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
+@@ -226,8 +226,10 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	struct flow_action_entry *act;
+ 	struct mtk_flow_data data = {};
+ 	struct mtk_foe_entry foe;
+-	struct net_device *odev = NULL;
++	struct net_device *idev = NULL, *odev = NULL;
+ 	struct mtk_flow_entry *entry;
++	struct net_device_path_ctx ctx = {};
++	struct net_device_path path = {};
+ 	int offload_type = 0;
+ 	int wed_index = -1;
+ 	u16 addr_type = 0;
+@@ -242,6 +244,10 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 		struct flow_match_meta match;
+ 
+ 		flow_rule_match_meta(rule, &match);
++		idev = __dev_get_by_index(&init_net, match.key->ingress_ifindex);
++
++		if (!idev)
++			pr_info("[%s] idev doesn't exist !\n", __func__);
+ 	} else {
+ 		return -EOPNOTSUPP;
+ 	}
+@@ -435,11 +441,25 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	if (!entry)
+ 		return -ENOMEM;
+ 
++	i = 0;
++	if (idev && idev->netdev_ops->ndo_fill_receive_path) {
++		ctx.dev = idev;
++		idev->netdev_ops->ndo_fill_receive_path(&ctx, &path);
++		i = path.mtk_wdma.wdma_idx;
++		if (i >= eth->ppe_num) {
++			if (printk_ratelimit())
++				pr_info("[%s] PPE%d doesn't exist, please check mtketh-ppe-num in dts !\n", __func__, i);
++
++			return -EINVAL;
++		}
++	}
++
+ 	entry->cookie = f->cookie;
+ 	memcpy(&entry->data, &foe, sizeof(entry->data));
++	entry->ppe_index = i;
+ 	entry->wed_index = wed_index;
+ 
+-	if (mtk_foe_entry_commit(eth->ppe, entry) < 0)
++	if (mtk_foe_entry_commit(eth->ppe[i], entry) < 0)
+ 		goto free;
+ 
+ 	err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
+@@ -450,7 +470,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	return 0;
+ 
+ clear:
+-	mtk_foe_entry_clear(eth->ppe, entry);
++	mtk_foe_entry_clear(eth->ppe[i], entry);
+ free:
+ 	kfree(entry);
+ 	if (wed_index >= 0)
+@@ -462,13 +482,15 @@ static int
+ mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
+ {
+ 	struct mtk_flow_entry *entry;
++	int i;
+ 
+ 	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+ 				  mtk_flow_ht_params);
+ 	if (!entry)
+ 		return -ENOENT;
+ 
+-	mtk_foe_entry_clear(eth->ppe, entry);
++	i = entry->ppe_index;
++	mtk_foe_entry_clear(eth->ppe[i], entry);
+ 	rhashtable_remove_fast(&eth->flow_table, &entry->node,
+ 			       mtk_flow_ht_params);
+ 	if (entry->wed_index >= 0)
+@@ -483,13 +505,15 @@ mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
+ {
+ 	struct mtk_flow_entry *entry;
+ 	u32 idle;
++	int i;
+ 
+ 	entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
+ 				  mtk_flow_ht_params);
+ 	if (!entry)
+ 		return -ENOENT;
+ 
+-	idle = mtk_foe_entry_idle_time(eth->ppe, entry);
++	i = entry->ppe_index;
++	idle = mtk_foe_entry_idle_time(eth->ppe[i], entry);
+ 	f->stats.lastused = jiffies - idle * HZ;
+ 
+ 	return 0;
+@@ -540,9 +564,12 @@ mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
+ 	static LIST_HEAD(block_cb_list);
+ 	struct flow_block_cb *block_cb;
+ 	flow_setup_cb_t *cb;
++	int i;
+ 
+-	if (!eth->ppe || !eth->ppe->foe_table)
+-		return -EOPNOTSUPP;
++	for (i = 0; i < eth->ppe_num; i++) {
++		if (!eth->ppe[i] || !eth->ppe[i]->foe_table)
++			return -EOPNOTSUPP;
++	}
+ 
+ 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ 		return -EOPNOTSUPP;
+@@ -591,9 +618,9 @@ int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ 	}
+ }
+ 
+-int mtk_eth_offload_init(struct mtk_eth *eth)
++int mtk_eth_offload_init(struct mtk_eth *eth, int id)
+ {
+-	if (!eth->ppe || !eth->ppe->foe_table)
++	if (!eth->ppe[id] || !eth->ppe[id]->foe_table)
+ 		return 0;
+ 
+ 	return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 35998b1a7..0ada2461b 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1302,6 +1302,8 @@ struct tlsdev_ops;
+  *	rtnl_lock is not held.
+  * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
+  *     Get the forwarding path to reach the real device from the HW destination address
++ * int (*ndo_fill_receive_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
++ *     Get the receiving path to reach the real device from the HW source address
+  */
+ struct net_device_ops {
+ 	int			(*ndo_init)(struct net_device *dev);
+@@ -1501,6 +1503,8 @@ struct net_device_ops {
+ 	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
+ 	int                     (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
+                                                          struct net_device_path *path);
++	int                     (*ndo_fill_receive_path)(struct net_device_path_ctx *ctx,
++							 struct net_device_path *path);
+ };
+ 
+ /**
+-- 
+2.18.0
+
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9996-add-wed-tx-support-for-mt7986.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9996-add-wed-tx-support-for-mt7986.patch
new file mode 100755
index 0000000..8f3e2ca
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9996-add-wed-tx-support-for-mt7986.patch
@@ -0,0 +1,1275 @@
+From c6b43d63c3d4229b5f15cb7391192494b07e0fa7 Mon Sep 17 00:00:00 2001
+From: Bo Jiao <Bo.Jiao@mediatek.com>
+Date: Mon, 27 Jun 2022 14:53:54 +0800
+Subject: [PATCH 7/8] 9996-add-wed-tx-support-for-mt7986
+
+---
+ arch/arm64/boot/dts/mediatek/mt7986a.dtsi     |   2 +
+ arch/arm64/boot/dts/mediatek/mt7986b.dtsi     |   2 +
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c   |   8 +-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h   |   5 +
+ drivers/net/ethernet/mediatek/mtk_wed.c       | 502 +++++++++++++-----
+ drivers/net/ethernet/mediatek/mtk_wed.h       |  18 +-
+ .../net/ethernet/mediatek/mtk_wed_debugfs.c   |   3 +
+ drivers/net/ethernet/mediatek/mtk_wed_regs.h  | 127 ++++-
+ include/linux/soc/mediatek/mtk_wed.h          |  29 +-
+ 9 files changed, 546 insertions(+), 150 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+index 381136c21..644255b35 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+@@ -64,6 +64,7 @@
+ 		reg = <0 0x15010000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
++		mediatek,wed_pcie = <&wed_pcie>;
+ 	};
+ 
+ 	wed1: wed@15011000 {
+@@ -72,6 +73,7 @@
+ 		reg = <0 0x15011000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
++		mediatek,wed_pcie = <&wed_pcie>;
+ 	};
+ 
+ 	ap2woccif: ap2woccif@151A5000 {
+diff --git a/arch/arm64/boot/dts/mediatek/mt7986b.dtsi b/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
+index 0e5f116a2..67bf86f6a 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
+@@ -64,6 +64,7 @@
+ 		reg = <0 0x15010000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
++		mediatek,wed_pcie = <&wed_pcie>;
+ 	};
+ 
+ 	wed1: wed@15011000 {
+@@ -72,6 +73,7 @@
+ 		reg = <0 0x15011000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
++		mediatek,wed_pcie = <&wed_pcie>;
+ 	};
+ 
+ 	ap2woccif: ap2woccif@151A5000 {
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 3f67bebfe..ac021e2ed 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -3579,6 +3579,7 @@ static int mtk_probe(struct platform_device *pdev)
+ {
+ 	struct device_node *mac_np;
+ 	struct mtk_eth *eth;
++	struct resource *res;
+ 	int err, i;
+ 
+ 	eth = devm_kzalloc(&pdev->dev, sizeof(*eth), GFP_KERNEL);
+@@ -3594,7 +3595,6 @@ static int mtk_probe(struct platform_device *pdev)
+ 		return PTR_ERR(eth->base);
+ 
+ 	if(eth->soc->has_sram) {
+-		struct resource *res;
+ 		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ 		if (unlikely(!res))
+ 			return -EINVAL;
+@@ -3682,12 +3682,16 @@ static int mtk_probe(struct platform_device *pdev)
+ 			MTK_WDMA1_BASE
+ 		};
+ 		void __iomem *wdma;
++		u32 wdma_phy;
+ 
+ 		if (!np || i >= ARRAY_SIZE(wdma_regs))
+ 			break;
+ 
+ 		wdma = eth->base + wdma_regs[i];
+-		mtk_wed_add_hw(np, eth, wdma, i);
++		if (res)
++			wdma_phy = res->start + wdma_regs[i];
++
++		mtk_wed_add_hw(np, eth, wdma, wdma_phy, i);
+ 	}
+ 
+ 	for (i = 0; i < MTK_MAX_IRQ_NUM; i++) {
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index b4de7c0c6..4a69bd0cf 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -518,8 +518,13 @@
+ #define RX_DMA_SPORT_MASK       0x7
+ #endif
+ 
++#if defined(CONFIG_MEDIATEK_NETSYS_V2)
++#define MTK_WDMA0_BASE		0x4800
++#define MTK_WDMA1_BASE		0x4c00
++#else
+ #define MTK_WDMA0_BASE		0x2800
+ #define MTK_WDMA1_BASE		0x2c00
++#endif
+ 
+ /* QDMA descriptor txd4 */
+ #define TX_DMA_CHKSUM		(0x7 << 29)
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
+index ea1cbdf1a..48b0353bb 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed.c
++++ b/drivers/net/ethernet/mediatek/mtk_wed.c
+@@ -18,15 +18,6 @@
+ #include "mtk_wed.h"
+ #include "mtk_ppe.h"
+ 
+-#define MTK_PCIE_BASE(n)		(0x1a143000 + (n) * 0x2000)
+-
+-#define MTK_WED_PKT_SIZE		1900
+-#define MTK_WED_BUF_SIZE		2048
+-#define MTK_WED_BUF_PER_PAGE		(PAGE_SIZE / 2048)
+-
+-#define MTK_WED_TX_RING_SIZE		2048
+-#define MTK_WED_WDMA_RING_SIZE		1024
+-
+ static struct mtk_wed_hw *hw_list[2];
+ static DEFINE_MUTEX(hw_lock);
+ 
+@@ -80,14 +71,19 @@ mtk_wed_reset(struct mtk_wed_device *dev, u32 mask)
+ static struct mtk_wed_hw *
+ mtk_wed_assign(struct mtk_wed_device *dev)
+ {
+-	struct mtk_wed_hw *hw;
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
++		struct mtk_wed_hw *hw = hw_list[i];
++
++		if (!hw || hw->wed_dev)
++			continue;
+ 
+-	hw = hw_list[pci_domain_nr(dev->wlan.pci_dev->bus)];
+-	if (!hw || hw->wed_dev)
+-		return NULL;
++		hw->wed_dev = dev;
++		return hw;
++	}
+ 
+-	hw->wed_dev = dev;
+-	return hw;
++	return NULL;
+ }
+ 
+ static int
+@@ -96,19 +92,27 @@ mtk_wed_buffer_alloc(struct mtk_wed_device *dev)
+ 	struct mtk_wdma_desc *desc;
+ 	dma_addr_t desc_phys;
+ 	void **page_list;
++	u32 last_seg = MTK_WDMA_DESC_CTRL_LAST_SEG1;
+ 	int token = dev->wlan.token_start;
+-	int ring_size;
+-	int n_pages;
+-	int i, page_idx;
++	int ring_size, n_pages, page_idx;
++	int i;
++
++
++	if (dev->ver == MTK_WED_V1) {
++		ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
++	} else {
++		ring_size = MTK_WED_VLD_GROUP_SIZE * MTK_WED_PER_GROUP_PKT +
++			    MTK_WED_WDMA_RING_SIZE * 2;
++		last_seg = MTK_WDMA_DESC_CTRL_LAST_SEG0;
++	}
+ 
+-	ring_size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
+ 	n_pages = ring_size / MTK_WED_BUF_PER_PAGE;
+ 
+ 	page_list = kcalloc(n_pages, sizeof(*page_list), GFP_KERNEL);
+ 	if (!page_list)
+ 		return -ENOMEM;
+ 
+-	dev->buf_ring.size = ring_size;
++	dev->buf_ring.size = dev->wlan.nbuf & ~(MTK_WED_BUF_PER_PAGE - 1);
+ 	dev->buf_ring.pages = page_list;
+ 
+ 	desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc),
+@@ -154,7 +158,7 @@ mtk_wed_buffer_alloc(struct mtk_wed_device *dev)
+ 						txd_size) |
+ 				     FIELD_PREP(MTK_WDMA_DESC_CTRL_LEN1,
+ 						MTK_WED_BUF_SIZE - txd_size) |
+-				     MTK_WDMA_DESC_CTRL_LAST_SEG1;
++						last_seg;
+ 			desc->info = 0;
+ 			desc++;
+ 
+@@ -202,12 +206,12 @@ free_pagelist:
+ }
+ 
+ static void
+-mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring)
++mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring, int scale)
+ {
+ 	if (!ring->desc)
+ 		return;
+ 
+-	dma_free_coherent(dev->hw->dev, ring->size * sizeof(*ring->desc),
++	dma_free_coherent(dev->hw->dev, ring->size * sizeof(*ring->desc) * scale,
+ 			  ring->desc, ring->desc_phys);
+ }
+ 
+@@ -217,9 +221,69 @@ mtk_wed_free_tx_rings(struct mtk_wed_device *dev)
+ 	int i;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(dev->tx_ring); i++)
+-		mtk_wed_free_ring(dev, &dev->tx_ring[i]);
++		mtk_wed_free_ring(dev, &dev->tx_ring[i], 1);
+ 	for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
+-		mtk_wed_free_ring(dev, &dev->tx_wdma[i]);
++		mtk_wed_free_ring(dev, &dev->tx_wdma[i], dev->ver);
++}
++
++static void
++mtk_wed_set_int(struct mtk_wed_device *dev, u32 irq_mask)
++{
++	u32 wdma_mask;
++
++	wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0));
++
++	/* wed control cr set */
++	wed_set(dev, MTK_WED_CTRL,
++		MTK_WED_CTRL_WDMA_INT_AGENT_EN |
++		MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
++		MTK_WED_CTRL_WED_TX_BM_EN |
++		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
++
++	if (dev->ver == MTK_WED_V1) {
++		wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER,
++			MTK_WED_PCIE_INT_TRIGGER_STATUS);
++
++		wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER,
++			MTK_WED_WPDMA_INT_TRIGGER_RX_DONE |
++			MTK_WED_WPDMA_INT_TRIGGER_TX_DONE);
++
++		wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
++			MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
++	} else {
++		/* initail tx interrupt trigger */
++		wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX,
++			MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN |
++			MTK_WED_WPDMA_INT_CTRL_TX0_DONE_CLR |
++			MTK_WED_WPDMA_INT_CTRL_TX1_DONE_EN |
++			MTK_WED_WPDMA_INT_CTRL_TX1_DONE_CLR |
++			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX0_DONE_TRIG,
++				   dev->wlan.tx_tbit[0]) |
++			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX1_DONE_TRIG,
++				   dev->wlan.tx_tbit[1]));
++
++		/* initail txfree interrupt trigger */
++		wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX_FREE,
++			MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_EN |
++			MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_CLR |
++			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_TRIG,
++				    dev->wlan.txfree_tbit));
++	}
++	/* initail wdma interrupt agent */
++	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask);
++	if (dev->ver == MTK_WED_V1) {
++		wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
++	} else {
++		wed_w32(dev, MTK_WED_WDMA_INT_CLR, wdma_mask);
++		wed_set(dev, MTK_WED_WDMA_INT_CTRL,
++			FIELD_PREP(MTK_WED_WDMA_INT_POLL_SRC_SEL,dev->wdma_idx));
++
++	}
++
++	wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask);
++	wdma_w32(dev, MTK_WDMA_INT_GRP2, wdma_mask);
++	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask);
++	wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
+ }
+ 
+ static void
+@@ -234,10 +298,95 @@ mtk_wed_set_ext_int(struct mtk_wed_device *dev, bool en)
+ 	wed_r32(dev, MTK_WED_EXT_INT_MASK);
+ }
+ 
++static void
++mtk_wed_set_512_support(struct mtk_wed_device *dev, bool en)
++{
++	if (en) {
++		wed_w32(dev, MTK_WED_TXDP_CTRL, MTK_WED_TXDP_DW9_OVERWR);
++		wed_w32(dev, MTK_WED_TXP_DW1,
++			FIELD_PREP(MTK_WED_WPDMA_WRITE_TXP, 0x0103));
++	} else {
++		wed_w32(dev, MTK_WED_TXP_DW1,
++			FIELD_PREP(MTK_WED_WPDMA_WRITE_TXP, 0x0100));
++		wed_clr(dev, MTK_WED_TXDP_CTRL, MTK_WED_TXDP_DW9_OVERWR);
++	}
++}
++
++static void
++mtk_wed_dma_enable(struct mtk_wed_device *dev)
++{
++	wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
++		MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
++
++	wed_set(dev, MTK_WED_GLO_CFG,
++		MTK_WED_GLO_CFG_TX_DMA_EN |
++		MTK_WED_GLO_CFG_RX_DMA_EN);
++	wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
++		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
++		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
++	wed_set(dev, MTK_WED_WDMA_GLO_CFG,
++		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
++
++	wdma_set(dev, MTK_WDMA_GLO_CFG,
++		 MTK_WDMA_GLO_CFG_TX_DMA_EN |
++		 MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
++		 MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
++
++	if (dev->ver == MTK_WED_V1) {
++		wdma_set(dev, MTK_WDMA_GLO_CFG,
++			 MTK_WDMA_GLO_CFG_RX_INFO3_PRERES);
++	} else {
++		wed_set(dev, MTK_WED_WPDMA_CTRL,
++			MTK_WED_WPDMA_CTRL_SDL1_FIXED);
++
++		wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
++			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
++			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
++
++		wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
++			MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP |
++			MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV);
++	}
++}
++
++static void
++mtk_wed_dma_disable(struct mtk_wed_device *dev)
++{
++	wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
++		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
++		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
++
++	wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
++		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
++
++	wed_clr(dev, MTK_WED_GLO_CFG,
++		MTK_WED_GLO_CFG_TX_DMA_EN |
++		MTK_WED_GLO_CFG_RX_DMA_EN);
++
++	wdma_m32(dev, MTK_WDMA_GLO_CFG,
++		 MTK_WDMA_GLO_CFG_TX_DMA_EN |
++		 MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
++		 MTK_WDMA_GLO_CFG_RX_INFO2_PRERES, 0);
++
++	if (dev->ver == MTK_WED_V1) {
++		regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
++		wdma_m32(dev, MTK_WDMA_GLO_CFG,
++			 MTK_WDMA_GLO_CFG_RX_INFO3_PRERES, 0);
++	} else {
++		wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
++			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
++			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
++	}
++}
++
+ static void
+ mtk_wed_stop(struct mtk_wed_device *dev)
+ {
+-	regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
++	mtk_wed_dma_disable(dev);
++
++	if (dev->ver > MTK_WED_V1)
++		mtk_wed_set_512_support(dev, false);
++
+ 	mtk_wed_set_ext_int(dev, false);
+ 
+ 	wed_clr(dev, MTK_WED_CTRL,
+@@ -245,26 +394,18 @@ mtk_wed_stop(struct mtk_wed_device *dev)
+ 		MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
+ 		MTK_WED_CTRL_WED_TX_BM_EN |
+ 		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
++
+ 	wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
+ 	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
+ 	wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
+ 	wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
+ 	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
+-
+-	wed_clr(dev, MTK_WED_GLO_CFG,
+-		MTK_WED_GLO_CFG_TX_DMA_EN |
+-		MTK_WED_GLO_CFG_RX_DMA_EN);
+-	wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+-		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+-		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+-	wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
+-		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
+ }
+ 
+ static void
+ mtk_wed_detach(struct mtk_wed_device *dev)
+ {
+-	struct device_node *wlan_node = dev->wlan.pci_dev->dev.of_node;
++	struct device_node *wlan_node;
+ 	struct mtk_wed_hw *hw = dev->hw;
+ 
+ 	mutex_lock(&hw_lock);
+@@ -279,9 +420,12 @@ mtk_wed_detach(struct mtk_wed_device *dev)
+ 	mtk_wed_free_buffer(dev);
+ 	mtk_wed_free_tx_rings(dev);
+ 
+-	if (of_dma_is_coherent(wlan_node))
+-		regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
+-				   BIT(hw->index), BIT(hw->index));
++	if (dev->wlan.bus_type == MTK_BUS_TYPE_PCIE) {
++		wlan_node = dev->wlan.pci_dev->dev.of_node;
++		if (of_dma_is_coherent(wlan_node))
++			regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
++					   BIT(hw->index), BIT(hw->index));
++	}
+ 
+ 	if (!hw_list[!hw->index]->wed_dev &&
+ 	    hw->eth->dma_dev != hw->eth->dev)
+@@ -294,15 +438,87 @@ mtk_wed_detach(struct mtk_wed_device *dev)
+ 	mutex_unlock(&hw_lock);
+ }
+ 
++static void
++mtk_wed_bus_init(struct mtk_wed_device *dev)
++{
++#define PCIE_BASE_ADDR0 0x11280000
++
++	if (dev->wlan.bus_type == MTK_BUS_TYPE_PCIE) {
++		struct device_node *node;
++		void __iomem * base_addr;
++		u32 value = 0;
++
++		node = of_parse_phandle(dev->hw->node, "mediatek,wed_pcie", 0);
++		if (!node) {
++			pr_err("%s: no wed_pcie node\n", __func__);
++			return;
++		}
++
++		base_addr = of_iomap(node, 0);
++
++		value = readl(base_addr);
++		value |= BIT(0);
++		writel(value, base_addr);
++
++		wed_w32(dev, MTK_WED_PCIE_INT_CTRL,
++			FIELD_PREP(MTK_WED_PCIE_INT_CTRL_POLL_EN, 2));
++
++		/* pcie interrupt control: pola/source selection */
++		wed_set(dev, MTK_WED_PCIE_INT_CTRL,
++			MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA |
++			FIELD_PREP(MTK_WED_PCIE_INT_CTRL_SRC_SEL, 1));
++		wed_r32(dev, MTK_WED_PCIE_INT_CTRL);
++
++		value = wed_r32(dev, MTK_WED_PCIE_CFG_INTM);
++		value = wed_r32(dev, MTK_WED_PCIE_CFG_BASE);
++		wed_w32(dev, MTK_WED_PCIE_CFG_INTM, PCIE_BASE_ADDR0 | 0x180);
++		wed_w32(dev, MTK_WED_PCIE_CFG_BASE, PCIE_BASE_ADDR0 | 0x184);
++
++		value = wed_r32(dev, MTK_WED_PCIE_CFG_INTM);
++		value = wed_r32(dev, MTK_WED_PCIE_CFG_BASE);
++
++		/* pcie interrupt status trigger register */
++		wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24));
++		wed_r32(dev, MTK_WED_PCIE_INT_TRIGGER);
++
++		/* pola setting */
++		value = wed_r32(dev, MTK_WED_PCIE_INT_CTRL);
++		wed_set(dev, MTK_WED_PCIE_INT_CTRL,
++			MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA);
++	} else if (dev->wlan.bus_type == MTK_BUS_TYPE_AXI) {
++		wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
++			MTK_WED_WPDMA_INT_CTRL_SIG_SRC |
++			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_SRC_SEL, 0));
++	}
++	return;
++}
++
++static void
++mtk_wed_set_wpdma(struct mtk_wed_device *dev)
++{
++	if (dev->ver > MTK_WED_V1) {
++		wed_w32(dev, MTK_WED_WPDMA_CFG_BASE,  dev->wlan.wpdma_int);
++		wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK,  dev->wlan.wpdma_mask);
++		wed_w32(dev, MTK_WED_WPDMA_CFG_TX,  dev->wlan.wpdma_tx);
++		wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE,  dev->wlan.wpdma_txfree);
++	} else {
++		wed_w32(dev, MTK_WED_WPDMA_CFG_BASE,  dev->wlan.wpdma_phys);
++	}
++}
++
+ static void
+ mtk_wed_hw_init_early(struct mtk_wed_device *dev)
+ {
+ 	u32 mask, set;
+-	u32 offset;
+ 
+ 	mtk_wed_stop(dev);
+ 	mtk_wed_reset(dev, MTK_WED_RESET_WED);
+ 
++	if (dev->ver > MTK_WED_V1)
++		mtk_wed_bus_init(dev);
++
++	mtk_wed_set_wpdma(dev);
++
+ 	mask = MTK_WED_WDMA_GLO_CFG_BT_SIZE |
+ 	       MTK_WED_WDMA_GLO_CFG_DYNAMIC_DMAD_RECYCLE |
+ 	       MTK_WED_WDMA_GLO_CFG_RX_DIS_FSM_AUTO_IDLE;
+@@ -311,30 +527,54 @@ mtk_wed_hw_init_early(struct mtk_wed_device *dev)
+ 	      MTK_WED_WDMA_GLO_CFG_IDLE_DMAD_SUPPLY;
+ 	wed_m32(dev, MTK_WED_WDMA_GLO_CFG, mask, set);
+ 
+-	wdma_set(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_RX_INFO_PRERES);
++	if (dev->ver == MTK_WED_V1) {
++		u32 offset;
++		offset = dev->hw->index ? 0x04000400 : 0;
++		wed_w32(dev, MTK_WED_WDMA_OFFSET0, 0x2a042a20 + offset);
++		wed_w32(dev, MTK_WED_WDMA_OFFSET1, 0x29002800 + offset);
+ 
+-	offset = dev->hw->index ? 0x04000400 : 0;
+-	wed_w32(dev, MTK_WED_WDMA_OFFSET0, 0x2a042a20 + offset);
+-	wed_w32(dev, MTK_WED_WDMA_OFFSET1, 0x29002800 + offset);
++		wed_w32(dev, MTK_WED_PCIE_CFG_BASE, MTK_PCIE_BASE(dev->hw->index));
++	} else {
++		wed_w32(dev, MTK_WED_WDMA_CFG_BASE, dev->hw->wdma_phy);
++		wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_ETH_DMAD_FMT);
++		wed_w32(dev, MTK_WED_WDMA_OFFSET0,
++			FIELD_PREP(MTK_WED_WDMA_OFST0_GLO_INTS,
++				   MTK_WDMA_INT_STATUS) |
++			FIELD_PREP(MTK_WED_WDMA_OFST0_GLO_CFG,
++				   MTK_WDMA_GLO_CFG));
++
++		wed_w32(dev, MTK_WED_WDMA_OFFSET1,
++			FIELD_PREP(MTK_WED_WDMA_OFST1_TX_CTRL,
++				   MTK_WDMA_RING_TX(0)) |
++			FIELD_PREP(MTK_WED_WDMA_OFST1_RX_CTRL,
++				   MTK_WDMA_RING_RX(0)));
++	}
+ 
+-	wed_w32(dev, MTK_WED_PCIE_CFG_BASE, MTK_PCIE_BASE(dev->hw->index));
+-	wed_w32(dev, MTK_WED_WPDMA_CFG_BASE, dev->wlan.wpdma_phys);
+ }
+ 
+ static void
+ mtk_wed_hw_init(struct mtk_wed_device *dev)
+ {
++	int size = dev->buf_ring.size;
++	int rev_size = MTK_WED_TX_RING_SIZE / 2;
++	int thr = 1;
++
+ 	if (dev->init_done)
+ 		return;
+ 
+ 	dev->init_done = true;
+ 	mtk_wed_set_ext_int(dev, false);
++
++	if (dev->ver > MTK_WED_V1) {
++		size = MTK_WED_WDMA_RING_SIZE * 2 + dev->buf_ring.size;
++		rev_size = size;
++		thr = 0;
++	}
++
+ 	wed_w32(dev, MTK_WED_TX_BM_CTRL,
+ 		MTK_WED_TX_BM_CTRL_PAUSE |
+-		FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM,
+-			   dev->buf_ring.size / 128) |
+-		FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM,
+-			   MTK_WED_TX_RING_SIZE / 256));
++		FIELD_PREP(MTK_WED_TX_BM_CTRL_VLD_GRP_NUM, size / 128) |
++		FIELD_PREP(MTK_WED_TX_BM_CTRL_RSV_GRP_NUM, rev_size / 128));
+ 
+ 	wed_w32(dev, MTK_WED_TX_BM_BASE, dev->buf_ring.desc_phys);
+ 
+@@ -347,28 +587,38 @@ mtk_wed_hw_init(struct mtk_wed_device *dev)
+ 	wed_w32(dev, MTK_WED_TX_BM_BUF_LEN, MTK_WED_PKT_SIZE);
+ 
+ 	wed_w32(dev, MTK_WED_TX_BM_DYN_THR,
+-		FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, 1) |
++		FIELD_PREP(MTK_WED_TX_BM_DYN_THR_LO, thr) |
+ 		MTK_WED_TX_BM_DYN_THR_HI);
+ 
++	if (dev->ver > MTK_WED_V1) {
++		wed_w32(dev, MTK_WED_TX_TKID_CTRL,
++			MTK_WED_TX_TKID_CTRL_PAUSE |
++			FIELD_PREP(MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM,
++				   dev->buf_ring.size / 128) |
++			FIELD_PREP(MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM,
++				   dev->buf_ring.size / 128));
++		wed_w32(dev, MTK_WED_TX_TKID_DYN_THR,
++			FIELD_PREP(MTK_WED_TX_TKID_DYN_THR_LO, 0) |
++			MTK_WED_TX_TKID_DYN_THR_HI);
++	}
+ 	mtk_wed_reset(dev, MTK_WED_RESET_TX_BM);
+ 
+-	wed_set(dev, MTK_WED_CTRL,
+-		MTK_WED_CTRL_WED_TX_BM_EN |
+-		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
+-
+ 	wed_clr(dev, MTK_WED_TX_BM_CTRL, MTK_WED_TX_BM_CTRL_PAUSE);
++	if (dev->ver > MTK_WED_V1)
++		wed_clr(dev, MTK_WED_TX_TKID_CTRL, MTK_WED_TX_TKID_CTRL_PAUSE);
+ }
+ 
+ static void
+-mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size)
++mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size, int scale)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < size; i++) {
+-		desc[i].buf0 = 0;
+-		desc[i].ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
+-		desc[i].buf1 = 0;
+-		desc[i].info = 0;
++		desc->buf0 = 0;
++		desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
++		desc->buf1 = 0;
++		desc->info = 0;
++		desc += scale;
+ 	}
+ }
+ 
+@@ -424,7 +674,7 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
+ 		if (!desc)
+ 			continue;
+ 
+-		mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE);
++		mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE, dev->ver);
+ 	}
+ 
+ 	if (mtk_wed_poll_busy(dev))
+@@ -481,16 +731,16 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
+ 
+ static int
+ mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring,
+-		   int size)
++		   int size, int scale)
+ {
+ 	ring->desc = dma_alloc_coherent(dev->hw->dev,
+-					size * sizeof(*ring->desc),
++					size * sizeof(*ring->desc) * scale,
+ 					&ring->desc_phys, GFP_KERNEL);
+ 	if (!ring->desc)
+ 		return -ENOMEM;
+ 
+ 	ring->size = size;
+-	mtk_wed_ring_reset(ring->desc, size);
++	mtk_wed_ring_reset(ring->desc, size, scale);
+ 
+ 	return 0;
+ }
+@@ -500,7 +750,7 @@ mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size)
+ {
+ 	struct mtk_wed_ring *wdma = &dev->tx_wdma[idx];
+ 
+-	if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE))
++	if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, dev->ver))
+ 		return -ENOMEM;
+ 
+ 	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
+@@ -521,60 +771,36 @@ static void
+ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
+ {
+ 	u32 wdma_mask;
+-	u32 val;
+ 	int i;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
+ 		if (!dev->tx_wdma[i].desc)
+ 			mtk_wed_wdma_ring_setup(dev, i, 16);
+ 
+-	wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0));
+ 
+ 	mtk_wed_hw_init(dev);
+ 
+-	wed_set(dev, MTK_WED_CTRL,
+-		MTK_WED_CTRL_WDMA_INT_AGENT_EN |
+-		MTK_WED_CTRL_WPDMA_INT_AGENT_EN |
+-		MTK_WED_CTRL_WED_TX_BM_EN |
+-		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
+-
+-	wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, MTK_WED_PCIE_INT_TRIGGER_STATUS);
++	mtk_wed_set_int(dev, irq_mask);
+ 
+-	wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER,
+-		MTK_WED_WPDMA_INT_TRIGGER_RX_DONE |
+-		MTK_WED_WPDMA_INT_TRIGGER_TX_DONE);
+-
+-	wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
+-		MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
+-
+-	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask);
+-	wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
+-
+-	wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask);
+-	wdma_w32(dev, MTK_WDMA_INT_GRP2, wdma_mask);
+ 
+-	wed_w32(dev, MTK_WED_WPDMA_INT_MASK, irq_mask);
+-	wed_w32(dev, MTK_WED_INT_MASK, irq_mask);
++	mtk_wed_set_ext_int(dev, true);
+ 
+-	wed_set(dev, MTK_WED_GLO_CFG,
+-		MTK_WED_GLO_CFG_TX_DMA_EN |
+-		MTK_WED_GLO_CFG_RX_DMA_EN);
+-	wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+-		MTK_WED_WPDMA_GLO_CFG_TX_DRV_EN |
+-		MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN);
+-	wed_set(dev, MTK_WED_WDMA_GLO_CFG,
+-		MTK_WED_WDMA_GLO_CFG_RX_DRV_EN);
++	if (dev->ver == MTK_WED_V1) {
++		u32 val;
+ 
+-	mtk_wed_set_ext_int(dev, true);
+-	val = dev->wlan.wpdma_phys |
+-	      MTK_PCIE_MIRROR_MAP_EN |
+-	      FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index);
++		val = dev->wlan.wpdma_phys |
++		      MTK_PCIE_MIRROR_MAP_EN |
++		      FIELD_PREP(MTK_PCIE_MIRROR_MAP_WED_ID, dev->hw->index);
+ 
+-	if (dev->hw->index)
+-		val |= BIT(1);
+-	val |= BIT(0);
+-	regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
++		if (dev->hw->index)
++			val |= BIT(1);
++		val |= BIT(0);
++		regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
++	} else {
++		mtk_wed_set_512_support(dev, true);
++	}
+ 
++	mtk_wed_dma_enable(dev);
+ 	dev->running = true;
+ }
+ 
+@@ -588,15 +814,11 @@ mtk_wed_attach(struct mtk_wed_device *dev)
+ 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
+ 			 "mtk_wed_attach without holding the RCU read lock");
+ 
+-	if (pci_domain_nr(dev->wlan.pci_dev->bus) > 1 ||
+-	    !try_module_get(THIS_MODULE))
+-		ret = -ENODEV;
++	if (!try_module_get(THIS_MODULE))
++		return -ENODEV;
+ 
+ 	rcu_read_unlock();
+ 
+-	if (ret)
+-		return ret;
+-
+ 	mutex_lock(&hw_lock);
+ 
+ 	hw = mtk_wed_assign(dev);
+@@ -606,8 +828,6 @@ mtk_wed_attach(struct mtk_wed_device *dev)
+ 		goto out;
+ 	}
+ 
+-	dev_info(&dev->wlan.pci_dev->dev, "attaching wed device %d\n", hw->index);
+-
+ 	dev->hw = hw;
+ 	dev->dev = hw->dev;
+ 	dev->irq = hw->irq;
+@@ -617,6 +837,9 @@ mtk_wed_attach(struct mtk_wed_device *dev)
+ 	    of_dma_is_coherent(hw->eth->dev->of_node))
+ 		mtk_eth_set_dma_device(hw->eth, hw->dev);
+ 
++	dev->ver = FIELD_GET(MTK_WED_REV_ID_MAJOR,
++			    wed_r32(dev, MTK_WED_REV_ID));
++
+ 	ret = mtk_wed_buffer_alloc(dev);
+ 	if (ret) {
+ 		mtk_wed_detach(dev);
+@@ -624,7 +847,10 @@ mtk_wed_attach(struct mtk_wed_device *dev)
+ 	}
+ 
+ 	mtk_wed_hw_init_early(dev);
+-	regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP, BIT(hw->index), 0);
++
++	if (dev->ver == MTK_WED_V1)
++		regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
++				   BIT(hw->index), 0);
+ 
+ out:
+ 	mutex_unlock(&hw_lock);
+@@ -651,7 +877,7 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
+ 
+ 	BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+ 
+-	if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
++	if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE, 1))
+ 		return -ENOMEM;
+ 
+ 	if (mtk_wed_wdma_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
+@@ -678,21 +904,24 @@ static int
+ mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
+ {
+ 	struct mtk_wed_ring *ring = &dev->txfree_ring;
+-	int i;
++	int i, idx = 1;
++
++	if(dev->ver > MTK_WED_V1)
++		idx = 0;
+ 
+ 	/*
+ 	 * For txfree event handling, the same DMA ring is shared between WED
+ 	 * and WLAN. The WLAN driver accesses the ring index registers through
+ 	 * WED
+ 	 */
+-	ring->reg_base = MTK_WED_RING_RX(1);
++	ring->reg_base = MTK_WED_RING_RX(idx);
+ 	ring->wpdma = regs;
+ 
+ 	for (i = 0; i < 12; i += 4) {
+ 		u32 val = readl(regs + i);
+ 
+-		wed_w32(dev, MTK_WED_RING_RX(1) + i, val);
+-		wed_w32(dev, MTK_WED_WPDMA_RING_RX(1) + i, val);
++		wed_w32(dev, MTK_WED_RING_RX(idx) + i, val);
++		wed_w32(dev, MTK_WED_WPDMA_RING_RX(idx) + i, val);
+ 	}
+ 
+ 	return 0;
+@@ -706,10 +935,8 @@ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
+ 	val = wed_r32(dev, MTK_WED_EXT_INT_STATUS);
+ 	wed_w32(dev, MTK_WED_EXT_INT_STATUS, val);
+ 	val &= MTK_WED_EXT_INT_STATUS_ERROR_MASK;
+-	if (!dev->hw->num_flows)
+-		val &= ~MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD;
+-	if (val && net_ratelimit())
+-		pr_err("mtk_wed%d: error status=%08x\n", dev->hw->index, val);
++	WARN_RATELIMIT(val, "mtk_wed%d: error status=%08x\n",
++		       dev->hw->index, val);
+ 
+ 	val = wed_r32(dev, MTK_WED_INT_STATUS);
+ 	val &= mask;
+@@ -780,7 +1007,8 @@ out:
+ }
+ 
+ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+-		    void __iomem *wdma, int index)
++			void __iomem *wdma, u32 wdma_phy, int index)
++
+ {
+ 	static const struct mtk_wed_ops wed_ops = {
+ 		.attach = mtk_wed_attach,
+@@ -830,21 +1058,27 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+ 	hw->eth = eth;
+ 	hw->dev = &pdev->dev;
+ 	hw->wdma = wdma;
++	hw->wdma_phy = wdma_phy;
+ 	hw->index = index;
+ 	hw->irq = irq;
+-	hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
+-						     "mediatek,pcie-mirror");
+-	hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np,
+-						     "mediatek,hifsys");
+-	if (IS_ERR(hw->mirror) || IS_ERR(hw->hifsys)) {
+-		kfree(hw);
+-		goto unlock;
+-	}
+ 
+-	if (!index) {
+-		regmap_write(hw->mirror, 0, 0);
+-		regmap_write(hw->mirror, 4, 0);
++	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2)) {
++		hw->mirror = syscon_regmap_lookup_by_phandle(eth_np,
++							     "mediatek,pcie-mirror");
++		hw->hifsys = syscon_regmap_lookup_by_phandle(eth_np,
++							     "mediatek,hifsys");
++
++		if (IS_ERR(hw->mirror) || IS_ERR(hw->hifsys)) {
++			kfree(hw);
++			goto unlock;
++		}
++
++		if (!index) {
++			regmap_write(hw->mirror, 0, 0);
++			regmap_write(hw->mirror, 4, 0);
++		}
+ 	}
++
+ 	mtk_wed_hw_add_debugfs(hw);
+ 
+ 	hw_list[index] = hw;
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h
+index 981ec613f..9b17b7405 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed.h
++++ b/drivers/net/ethernet/mediatek/mtk_wed.h
+@@ -8,6 +8,19 @@
+ #include <linux/debugfs.h>
+ #include <linux/regmap.h>
+ #include <linux/netdevice.h>
++#define MTK_PCIE_BASE(n)		(0x1a143000 + (n) * 0x2000)
++
++#define MTK_WED_PKT_SIZE		1900
++#define MTK_WED_BUF_SIZE		2048
++#define MTK_WED_BUF_PER_PAGE		(PAGE_SIZE / 2048)
++
++#define MTK_WED_TX_RING_SIZE		2048
++#define MTK_WED_WDMA_RING_SIZE		512
++#define MTK_WED_MAX_GROUP_SIZE		0x100
++#define MTK_WED_VLD_GROUP_SIZE		0x40
++#define MTK_WED_PER_GROUP_PKT		128
++
++#define MTK_WED_FBUF_SIZE		128
+ 
+ struct mtk_eth;
+ 
+@@ -23,6 +36,7 @@ struct mtk_wed_hw {
+ 	struct mtk_wed_device *wed_dev;
+ 	u32 debugfs_reg;
+ 	u32 num_flows;
++	u32 wdma_phy;
+ 	char dirname[5];
+ 	int irq;
+ 	int index;
+@@ -101,14 +115,14 @@ wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
+ }
+ 
+ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+-		    void __iomem *wdma, int index);
++		    void __iomem *wdma, u32 wdma_phy, int index);
+ void mtk_wed_exit(void);
+ int mtk_wed_flow_add(int index);
+ void mtk_wed_flow_remove(int index);
+ #else
+ static inline void
+ mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+-	       void __iomem *wdma, int index)
++	       void __iomem *wdma, u32 wdma_phy, int index)
+ {
+ }
+ static inline void
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+index a81d3fd1a..f420f187e 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
++++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+@@ -116,6 +116,9 @@ wed_txinfo_show(struct seq_file *s, void *data)
+ 		DUMP_WDMA(WDMA_GLO_CFG),
+ 		DUMP_WDMA_RING(WDMA_RING_RX(0)),
+ 		DUMP_WDMA_RING(WDMA_RING_RX(1)),
++
++		DUMP_STR("TX FREE"),
++		DUMP_WED(WED_RX_MIB(0)),
+ 	};
+ 	struct mtk_wed_hw *hw = s->private;
+ 	struct mtk_wed_device *dev = hw->wed_dev;
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+index 0a0465ea5..69f136ed4 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h
++++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+@@ -4,9 +4,15 @@
+ #ifndef __MTK_WED_REGS_H
+ #define __MTK_WED_REGS_H
+ 
++#if defined(CONFIG_MEDIATEK_NETSYS_V2)
++#define MTK_WDMA_DESC_CTRL_LEN1			GENMASK(13, 0)
++#define MTK_WDMA_DESC_CTRL_LAST_SEG1		BIT(14)
++#define MTK_WDMA_DESC_CTRL_BURST		BIT(15)
++#else
+ #define MTK_WDMA_DESC_CTRL_LEN1			GENMASK(14, 0)
+ #define MTK_WDMA_DESC_CTRL_LAST_SEG1		BIT(15)
+ #define MTK_WDMA_DESC_CTRL_BURST		BIT(16)
++#endif
+ #define MTK_WDMA_DESC_CTRL_LEN0			GENMASK(29, 16)
+ #define MTK_WDMA_DESC_CTRL_LAST_SEG0		BIT(30)
+ #define MTK_WDMA_DESC_CTRL_DMA_DONE		BIT(31)
+@@ -18,6 +24,10 @@ struct mtk_wdma_desc {
+ 	__le32 info;
+ } __packed __aligned(4);
+ 
++#define MTK_WED_REV_ID					0x004
++#define MTK_WED_REV_ID_MAJOR				GENMASK(31, 28)
++#define MTK_WED_REV_ID_MINOR				GENMASK(27, 16)
++
+ #define MTK_WED_RESET					0x008
+ #define MTK_WED_RESET_TX_BM				BIT(0)
+ #define MTK_WED_RESET_TX_FREE_AGENT			BIT(4)
+@@ -41,6 +51,7 @@ struct mtk_wdma_desc {
+ #define MTK_WED_CTRL_RESERVE_EN				BIT(12)
+ #define MTK_WED_CTRL_RESERVE_BUSY			BIT(13)
+ #define MTK_WED_CTRL_FINAL_DIDX_READ			BIT(24)
++#define MTK_WED_CTRL_ETH_DMAD_FMT			BIT(25)
+ #define MTK_WED_CTRL_MIB_READ_CLEAR			BIT(28)
+ 
+ #define MTK_WED_EXT_INT_STATUS				0x020
+@@ -49,6 +60,10 @@ struct mtk_wdma_desc {
+ #define MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID	BIT(4)
+ #define MTK_WED_EXT_INT_STATUS_TX_FBUF_LO_TH		BIT(8)
+ #define MTK_WED_EXT_INT_STATUS_TX_FBUF_HI_TH		BIT(9)
++#if defined(CONFIG_MEDIATEK_NETSYS_V2)
++#define MTK_WED_EXT_INT_STATUS_TX_TKID_LO_TH		BIT(10)
++#define MTK_WED_EXT_INT_STATUS_TX_TKID_HI_TH		BIT(11)
++#endif
+ #define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH		BIT(12)
+ #define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH		BIT(13)
+ #define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR	BIT(16)
+@@ -57,16 +72,24 @@ struct mtk_wdma_desc {
+ #define MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN	BIT(19)
+ #define MTK_WED_EXT_INT_STATUS_RX_DRV_BM_DMAD_COHERENT	BIT(20)
+ #define MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR	BIT(21)
+-#define MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR	BIT(22)
++#define MTK_WED_EXT_INT_STATUS_TX_DMA_R_RESP_ERR	BIT(22)
++#define MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR	BIT(23)
+ #define MTK_WED_EXT_INT_STATUS_RX_DRV_DMA_RECYCLE	BIT(24)
++#define MTK_WED_EXT_INT_STATUS_RX_DRV_GET_BM_DMAD_SKIP	BIT(25)
++#define MTK_WED_EXT_INT_STATUS_WPDMA_RX_D_DRV_ERR	BIT(26)
++#define MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY		BIT(27)
++
+ #define MTK_WED_EXT_INT_STATUS_ERROR_MASK		(MTK_WED_EXT_INT_STATUS_TF_LEN_ERR | \
+ 							 MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD | \
+ 							 MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID | \
++							 MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH | \
++							 MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH | \
+ 							 MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR | \
+ 							 MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR | \
++							 MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | \
+ 							 MTK_WED_EXT_INT_STATUS_RX_DRV_INIT_WDMA_EN | \
+-							 MTK_WED_EXT_INT_STATUS_TX_DRV_R_RESP_ERR | \
+-							 MTK_WED_EXT_INT_STATUS_TX_DRV_W_RESP_ERR)
++							 MTK_WED_EXT_INT_STATUS_TX_DMA_R_RESP_ERR | \
++							 MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR)
+ 
+ #define MTK_WED_EXT_INT_MASK				0x028
+ 
+@@ -80,10 +103,6 @@ struct mtk_wdma_desc {
+ 
+ #define MTK_WED_TX_BM_BASE				0x084
+ 
+-#define MTK_WED_TX_BM_TKID				0x088
+-#define MTK_WED_TX_BM_TKID_START			GENMASK(15, 0)
+-#define MTK_WED_TX_BM_TKID_END				GENMASK(31, 16)
+-
+ #define MTK_WED_TX_BM_BUF_LEN				0x08c
+ 
+ #define MTK_WED_TX_BM_INTF				0x09c
+@@ -93,9 +112,38 @@ struct mtk_wdma_desc {
+ #define MTK_WED_TX_BM_INTF_TKID_READ			BIT(29)
+ 
+ #define MTK_WED_TX_BM_DYN_THR				0x0a0
++#if defined(CONFIG_MEDIATEK_NETSYS_V2)
++#define MTK_WED_TX_BM_DYN_THR_LO			GENMASK(8, 0)
++#define MTK_WED_TX_BM_DYN_THR_HI			GENMASK(24, 16)
++
++#define MTK_WED_TX_BM_TKID				0x0c8
++#define MTK_WED_TX_BM_TKID_START			GENMASK(15, 0)
++#define MTK_WED_TX_BM_TKID_END				GENMASK(31, 16)
++#else
+ #define MTK_WED_TX_BM_DYN_THR_LO			GENMASK(6, 0)
+ #define MTK_WED_TX_BM_DYN_THR_HI			GENMASK(22, 16)
+ 
++#define MTK_WED_TX_BM_TKID				0x088
++#define MTK_WED_TX_BM_TKID_START			GENMASK(15, 0)
++#define MTK_WED_TX_BM_TKID_END				GENMASK(31, 16)
++#endif
++
++#define MTK_WED_TX_TKID_CTRL				0x0c0
++#define MTK_WED_TX_TKID_CTRL_VLD_GRP_NUM		GENMASK(6, 0)
++#define MTK_WED_TX_TKID_CTRL_RSV_GRP_NUM		GENMASK(22, 16)
++#define MTK_WED_TX_TKID_CTRL_PAUSE			BIT(28)
++
++#define MTK_WED_TX_TKID_DYN_THR				0x0e0
++#define MTK_WED_TX_TKID_DYN_THR_LO			GENMASK(6, 0)
++#define MTK_WED_TX_TKID_DYN_THR_HI			GENMASK(22, 16)
++
++#define MTK_WED_TXP_DW0					0x120
++#define MTK_WED_TXP_DW1					0x124
++#define MTK_WED_WPDMA_WRITE_TXP				GENMASK(31, 16)
++#define MTK_WED_TXDP_CTRL				0x130
++#define MTK_WED_TXDP_DW9_OVERWR				BIT(9)
++#define MTK_WED_RX_BM_TKID_MIB				0x1cc
++
+ #define MTK_WED_INT_STATUS				0x200
+ #define MTK_WED_INT_MASK				0x204
+ 
+@@ -125,6 +173,7 @@ struct mtk_wdma_desc {
+ #define MTK_WED_RESET_IDX_RX				GENMASK(17, 16)
+ 
+ #define MTK_WED_TX_MIB(_n)				(0x2a0 + (_n) * 4)
++#define MTK_WED_RX_MIB(_n)				(0x2e0 + (_n) * 4)
+ 
+ #define MTK_WED_RING_TX(_n)				(0x300 + (_n) * 0x10)
+ 
+@@ -139,6 +188,19 @@ struct mtk_wdma_desc {
+ #define MTK_WED_WPDMA_GLO_CFG_TX_DRV_BUSY		BIT(1)
+ #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EN			BIT(2)
+ #define MTK_WED_WPDMA_GLO_CFG_RX_DRV_BUSY		BIT(3)
++/* CONFIG_MEDIATEK_NETSYS_V2 */
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC	BIT(4)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_PKT_PROC	BIT(5)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC	BIT(6)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_R1_CRX_SYNC	BIT(7)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_EVENT_PKT_FMT_VER	GENMASK(18, 16)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UNSUPPORT_FMT	BIT(19)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DRV_UEVENT_PKT_FMT_CHK BIT(20)
++#define MTK_WED_WPDMA_GLO_CFG_RX_DDONE2_WR		BIT(21)
++#define MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP		BIT(24)
++#define MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV		BIT(28)
++
++/* CONFIG_MEDIATEK_NETSYS_V1 */
+ #define MTK_WED_WPDMA_GLO_CFG_RX_BT_SIZE		GENMASK(5, 4)
+ #define MTK_WED_WPDMA_GLO_CFG_TX_WB_DDONE		BIT(6)
+ #define MTK_WED_WPDMA_GLO_CFG_BIG_ENDIAN		BIT(7)
+@@ -152,24 +214,54 @@ struct mtk_wdma_desc {
+ #define MTK_WED_WPDMA_GLO_CFG_FIRST_TOKEN_ONLY		BIT(26)
+ #define MTK_WED_WPDMA_GLO_CFG_OMIT_RX_INFO		BIT(27)
+ #define MTK_WED_WPDMA_GLO_CFG_OMIT_TX_INFO		BIT(28)
++
+ #define MTK_WED_WPDMA_GLO_CFG_BYTE_SWAP			BIT(29)
++#define MTK_WED_WPDMA_GLO_CFG_TX_DDONE_CHK		BIT(30)
+ #define MTK_WED_WPDMA_GLO_CFG_RX_2B_OFFSET		BIT(31)
+ 
+ #define MTK_WED_WPDMA_RESET_IDX				0x50c
+ #define MTK_WED_WPDMA_RESET_IDX_TX			GENMASK(3, 0)
+ #define MTK_WED_WPDMA_RESET_IDX_RX			GENMASK(17, 16)
+ 
++#define MTK_WED_WPDMA_CTRL				0x518
++#define MTK_WED_WPDMA_CTRL_SDL1_FIXED			BIT(31)
++
+ #define MTK_WED_WPDMA_INT_CTRL				0x520
+ #define MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV		BIT(21)
++#define MTK_WED_WPDMA_INT_CTRL_SIG_SRC			BIT(22)
++#define MTK_WED_WPDMA_INT_CTRL_SRC_SEL			GENMASK(17, 16)
+ 
+ #define MTK_WED_WPDMA_INT_MASK				0x524
+ 
+-#define MTK_WED_PCIE_CFG_BASE				0x560
++#define MTK_WED_WPDMA_INT_CTRL_TX			0x530
++#define MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN 		BIT(0)
++#define MTK_WED_WPDMA_INT_CTRL_TX0_DONE_CLR 		BIT(1)
++#define MTK_WED_WPDMA_INT_CTRL_TX0_DONE_TRIG		GENMASK(6, 2)
++#define MTK_WED_WPDMA_INT_CTRL_TX1_DONE_EN		BIT(8)
++#define MTK_WED_WPDMA_INT_CTRL_TX1_DONE_CLR		BIT(9)
++#define MTK_WED_WPDMA_INT_CTRL_TX1_DONE_TRIG		GENMASK(14, 10)
++
++#define MTK_WED_WPDMA_INT_CTRL_RX			0x534
++
++#define MTK_WED_WPDMA_INT_CTRL_TX_FREE			0x538
++#define MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_EN		BIT(0)
++#define MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_CLR		BIT(1)
++#define MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_TRIG	GENMASK(6, 2)
+ 
++#define MTK_WED_PCIE_CFG_BASE				0x560
++#define MTK_WED_PCIE_CFG_INTM				0x564
++#define MTK_WED_PCIE_CFG_MSIS				0x568
+ #define MTK_WED_PCIE_INT_TRIGGER			0x570
+ #define MTK_WED_PCIE_INT_TRIGGER_STATUS			BIT(16)
+ 
++#define MTK_WED_PCIE_INT_CTRL				0x57c
++#define MTK_WED_PCIE_INT_CTRL_MSK_EN_POLA		BIT(20)
++#define MTK_WED_PCIE_INT_CTRL_SRC_SEL			GENMASK(17, 16)
++#define MTK_WED_PCIE_INT_CTRL_POLL_EN 			GENMASK(13, 12)
+ #define MTK_WED_WPDMA_CFG_BASE				0x580
++#define MTK_WED_WPDMA_CFG_INT_MASK			0x584
++#define MTK_WED_WPDMA_CFG_TX				0x588
++#define MTK_WED_WPDMA_CFG_TX_FREE			0x58c
+ 
+ #define MTK_WED_WPDMA_TX_MIB(_n)			(0x5a0 + (_n) * 4)
+ #define MTK_WED_WPDMA_TX_COHERENT_MIB(_n)		(0x5d0 + (_n) * 4)
+@@ -203,14 +295,22 @@ struct mtk_wdma_desc {
+ #define MTK_WED_WDMA_RESET_IDX_RX			GENMASK(17, 16)
+ #define MTK_WED_WDMA_RESET_IDX_DRV			GENMASK(25, 24)
+ 
++#define MTK_WED_WDMA_INT_CLR				0xa24
++#define MTK_WED_WDMA_INT_CLR_RX_DONE			GENMASK(17, 16)
++
+ #define MTK_WED_WDMA_INT_TRIGGER			0xa28
+ #define MTK_WED_WDMA_INT_TRIGGER_RX_DONE		GENMASK(17, 16)
+ 
+ #define MTK_WED_WDMA_INT_CTRL				0xa2c
+-#define MTK_WED_WDMA_INT_CTRL_POLL_SRC_SEL		GENMASK(17, 16)
++#define MTK_WED_WDMA_INT_POLL_SRC_SEL			GENMASK(17, 16)
+ 
++#define MTK_WED_WDMA_CFG_BASE				0xaa0
+ #define MTK_WED_WDMA_OFFSET0				0xaa4
+ #define MTK_WED_WDMA_OFFSET1				0xaa8
++#define MTK_WED_WDMA_OFST0_GLO_INTS			GENMASK(15, 0)
++#define MTK_WED_WDMA_OFST0_GLO_CFG			GENMASK(31, 16)
++#define MTK_WED_WDMA_OFST1_TX_CTRL			GENMASK(15, 0)
++#define MTK_WED_WDMA_OFST1_RX_CTRL			GENMASK(31, 16)
+ 
+ #define MTK_WED_WDMA_RX_MIB(_n)				(0xae0 + (_n) * 4)
+ #define MTK_WED_WDMA_RX_RECYCLE_MIB(_n)			(0xae8 + (_n) * 4)
+@@ -221,14 +321,21 @@ struct mtk_wdma_desc {
+ #define MTK_WED_RING_OFS_CPU_IDX			0x08
+ #define MTK_WED_RING_OFS_DMA_IDX			0x0c
+ 
++#define MTK_WDMA_RING_TX(_n)				(0x000 + (_n) * 0x10)
+ #define MTK_WDMA_RING_RX(_n)				(0x100 + (_n) * 0x10)
+ 
+ #define MTK_WDMA_GLO_CFG				0x204
+-#define MTK_WDMA_GLO_CFG_RX_INFO_PRERES			GENMASK(28, 26)
++#define MTK_WDMA_GLO_CFG_TX_DMA_EN			BIT(0)
++#define MTK_WDMA_GLO_CFG_RX_DMA_EN			BIT(2)
++#define MTK_WDMA_GLO_CFG_RX_INFO3_PRERES		BIT(26)
++#define MTK_WDMA_GLO_CFG_RX_INFO2_PRERES		BIT(27)
++#define MTK_WDMA_GLO_CFG_RX_INFO1_PRERES		BIT(28)
++
+ 
+ #define MTK_WDMA_RESET_IDX				0x208
+ #define MTK_WDMA_RESET_IDX_TX				GENMASK(3, 0)
+ #define MTK_WDMA_RESET_IDX_RX				GENMASK(17, 16)
++#define MTK_WDMA_INT_STATUS				0x220
+ 
+ #define MTK_WDMA_INT_MASK				0x228
+ #define MTK_WDMA_INT_MASK_TX_DONE			GENMASK(3, 0)
+diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
+index 7e00cca06..24742604b 100644
+--- a/include/linux/soc/mediatek/mtk_wed.h
++++ b/include/linux/soc/mediatek/mtk_wed.h
+@@ -8,6 +8,19 @@
+ 
+ #define MTK_WED_TX_QUEUES		2
+ 
++enum {
++	MTK_NO_WED,
++	MTK_WED_V1,
++	MTK_WED_V2,
++	MTK_WED_VMAX
++};
++
++enum {
++	MTK_BUS_TYPE_PCIE,
++	MTK_BUS_TYPE_AXI,
++	MTK_BUS_TYPE_MAX
++};
++
+ struct mtk_wed_hw;
+ struct mtk_wdma_desc;
+ 
+@@ -28,6 +41,7 @@ struct mtk_wed_device {
+ 	bool init_done, running;
+ 	int wdma_idx;
+ 	int irq;
++	u8 ver;
+ 
+ 	struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
+ 	struct mtk_wed_ring txfree_ring;
+@@ -43,8 +57,19 @@ struct mtk_wed_device {
+ 	/* filled by driver: */
+ 	struct {
+ 		struct pci_dev *pci_dev;
+-
+-		u32 wpdma_phys;
++		void __iomem *base;
++		u32 bus_type;
++
++		union {
++			u32 wpdma_phys;
++			u32 wpdma_int;
++		};
++		u32 wpdma_mask;
++		u32 wpdma_tx;
++		u32 wpdma_txfree;
++
++		u8 tx_tbit[MTK_WED_TX_QUEUES];
++		u8 txfree_tbit;
+ 
+ 		u16 token_start;
+ 		unsigned int nbuf;
+-- 
+2.18.0
+
diff --git a/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9997-add-wed-rx-support-for-mt7896.patch b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9997-add-wed-rx-support-for-mt7896.patch
new file mode 100755
index 0000000..c435e05
--- /dev/null
+++ b/autobuild_mac80211_release/target/linux/mediatek/patches-5.4/9997-add-wed-rx-support-for-mt7896.patch
@@ -0,0 +1,3416 @@
+From bc8244ada5c668374813f7f9b73d990bf2695aaf Mon Sep 17 00:00:00 2001
+From: Sujuan Chen <sujuan.chen@mediatek.com>
+Date: Wed, 15 Jun 2022 14:38:54 +0800
+Subject: [PATCH 8/8] 9997-add-wed-rx-support-for-mt7896
+
+Signed-off-by: Sujuan Chen <sujuan.chen@mediatek.com>
+---
+ arch/arm64/boot/dts/mediatek/mt7986a.dtsi     |  42 +-
+ arch/arm64/boot/dts/mediatek/mt7986b.dtsi     |  42 +-
+ drivers/net/ethernet/mediatek/Makefile        |   2 +-
+ drivers/net/ethernet/mediatek/mtk_wed.c       | 544 +++++++++++++++--
+ drivers/net/ethernet/mediatek/mtk_wed.h       |  50 ++
+ drivers/net/ethernet/mediatek/mtk_wed_ccif.c  | 121 ++++
+ drivers/net/ethernet/mediatek/mtk_wed_ccif.h  |  45 ++
+ .../net/ethernet/mediatek/mtk_wed_debugfs.c   |  90 +++
+ drivers/net/ethernet/mediatek/mtk_wed_mcu.c   | 561 ++++++++++++++++++
+ drivers/net/ethernet/mediatek/mtk_wed_mcu.h   | 125 ++++
+ drivers/net/ethernet/mediatek/mtk_wed_regs.h  | 145 ++++-
+ drivers/net/ethernet/mediatek/mtk_wed_wo.c    | 548 +++++++++++++++++
+ drivers/net/ethernet/mediatek/mtk_wed_wo.h    | 334 +++++++++++
+ include/linux/soc/mediatek/mtk_wed.h          |  63 +-
+ 14 files changed, 2643 insertions(+), 69 deletions(-)
+ mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_wed.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_ccif.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_ccif.h
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_mcu.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_mcu.h
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_wo.c
+ create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_wo.h
+
+diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+index 644255b35..ddcc0b809 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi
+@@ -65,6 +65,12 @@
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
+ 		mediatek,wed_pcie = <&wed_pcie>;
++		mediatek,ap2woccif = <&ap2woccif0>;
++		mediatek,wocpu_ilm = <&wocpu0_ilm>;
++		mediatek,wocpu_dlm = <&wocpu0_dlm>;
++		mediatek,wocpu_boot = <&cpu_boot>;
++		mediatek,wocpu_emi = <&wocpu0_emi>;
++		mediatek,wocpu_data = <&wocpu_data>;
+ 	};
+ 
+ 	wed1: wed@15011000 {
+@@ -74,15 +80,26 @@
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
+ 		mediatek,wed_pcie = <&wed_pcie>;
++		mediatek,ap2woccif = <&ap2woccif1>;
++		mediatek,wocpu_ilm = <&wocpu1_ilm>;
++		mediatek,wocpu_dlm = <&wocpu1_dlm>;
++		mediatek,wocpu_boot = <&cpu_boot>;
++		mediatek,wocpu_emi = <&wocpu1_emi>;
++		mediatek,wocpu_data = <&wocpu_data>;
+ 	};
+ 
+-	ap2woccif: ap2woccif@151A5000 {
+-		compatible = "mediatek,ap2woccif";
+-		reg = <0 0x151A5000 0 0x1000>,
+-		      <0 0x151AD000 0 0x1000>;
++	ap2woccif0: ap2woccif@151A5000 {
++		compatible = "mediatek,ap2woccif", "syscon";
++		reg = <0 0x151A5000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+-		interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>,
+-			     <GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH>;
++		interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>;
++        };
++
++	ap2woccif1: ap2woccif@0x151AD000 {
++		compatible = "mediatek,ap2woccif", "syscon";
++		reg = <0 0x151AD000 0 0x1000>;
++		interrupt-parent = <&gic>;
++		interrupts = <GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH>;
+         };
+ 
+ 	wocpu0_ilm: wocpu0_ilm@151E0000 {
+@@ -95,10 +112,17 @@
+                 reg = <0 0x151F0000 0 0x8000>;
+         };
+ 
+-	wocpu_dlm: wocpu_dlm@151E8000 {
++	wocpu0_dlm: wocpu_dlm@151E8000 {
++		compatible = "mediatek,wocpu_dlm";
++		reg = <0 0x151E8000 0 0x2000>;
++
++		resets = <&ethsysrst 0>;
++		reset-names = "wocpu_rst";
++	};
++
++	wocpu1_dlm: wocpu_dlm@0x151F8000 {
+ 		compatible = "mediatek,wocpu_dlm";
+-		reg = <0 0x151E8000 0 0x2000>,
+-		      <0 0x151F8000 0 0x2000>;
++		reg = <0 0x151F8000 0 0x2000>;
+ 
+ 		resets = <&ethsysrst 0>;
+ 		reset-names = "wocpu_rst";
+diff --git a/arch/arm64/boot/dts/mediatek/mt7986b.dtsi b/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
+index 67bf86f6a..6710b388b 100644
+--- a/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
++++ b/arch/arm64/boot/dts/mediatek/mt7986b.dtsi
+@@ -65,6 +65,12 @@
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 205 IRQ_TYPE_LEVEL_HIGH>;
+ 		mediatek,wed_pcie = <&wed_pcie>;
++		mediatek,ap2woccif = <&ap2woccif0>;
++		mediatek,wocpu_ilm = <&wocpu0_ilm>;
++		mediatek,wocpu_dlm = <&wocpu0_dlm>;
++		mediatek,wocpu_boot = <&cpu_boot>;
++		mediatek,wocpu_emi = <&wocpu0_emi>;
++		mediatek,wocpu_data = <&wocpu_data>;
+ 	};
+ 
+ 	wed1: wed@15011000 {
+@@ -74,15 +80,26 @@
+ 		interrupt-parent = <&gic>;
+ 		interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_HIGH>;
+ 		mediatek,wed_pcie = <&wed_pcie>;
++		mediatek,ap2woccif = <&ap2woccif1>;
++		mediatek,wocpu_ilm = <&wocpu1_ilm>;
++		mediatek,wocpu_dlm = <&wocpu1_dlm>;
++		mediatek,wocpu_boot = <&cpu_boot>;
++		mediatek,wocpu_emi = <&wocpu1_emi>;
++		mediatek,wocpu_data = <&wocpu_data>;
+ 	};
+ 
+-	ap2woccif: ap2woccif@151A5000 {
+-		compatible = "mediatek,ap2woccif";
+-		reg = <0 0x151A5000 0 0x1000>,
+-		      <0 0x151AD000 0 0x1000>;
++	ap2woccif0: ap2woccif@151A5000 {
++		compatible = "mediatek,ap2woccif", "syscon";
++		reg = <0 0x151A5000 0 0x1000>;
+ 		interrupt-parent = <&gic>;
+-		interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>,
+-			     <GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH>;
++		interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>;
++        };
++
++	ap2woccif1: ap2woccif@0x151AD000 {
++		compatible = "mediatek,ap2woccif", "syscon";
++		reg = <0 0x151AD000 0 0x1000>;
++		interrupt-parent = <&gic>;
++		interrupts = <GIC_SPI 212 IRQ_TYPE_LEVEL_HIGH>;
+         };
+ 
+ 	wocpu0_ilm: wocpu0_ilm@151E0000 {
+@@ -95,10 +112,17 @@
+                 reg = <0 0x151F0000 0 0x8000>;
+         };
+ 
+-	wocpu_dlm: wocpu_dlm@151E8000 {
++	wocpu0_dlm: wocpu_dlm@151E8000 {
++		compatible = "mediatek,wocpu_dlm";
++		reg = <0 0x151E8000 0 0x2000>;
++
++		resets = <&ethsysrst 0>;
++		reset-names = "wocpu_rst";
++	};
++
++	wocpu1_dlm: wocpu_dlm@0x151F8000 {
+ 		compatible = "mediatek,wocpu_dlm";
+-		reg = <0 0x151E8000 0 0x2000>,
+-		      <0 0x151F8000 0 0x2000>;
++		reg = <0 0x151F8000 0 0x2000>;
+ 
+ 		resets = <&ethsysrst 0>;
+ 		reset-names = "wocpu_rst";
+diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
+index 3528f1b3c..0c724a55c 100644
+--- a/drivers/net/ethernet/mediatek/Makefile
++++ b/drivers/net/ethernet/mediatek/Makefile
+@@ -10,5 +10,5 @@ mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed.o
+ ifdef CONFIG_DEBUG_FS
+ mtk_eth-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_debugfs.o
+ endif
+-obj-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_ops.o
++obj-$(CONFIG_NET_MEDIATEK_SOC_WED) += mtk_wed_ops.o mtk_wed_wo.o mtk_wed_mcu.o mtk_wed_ccif.o
+ obj-$(CONFIG_NET_MEDIATEK_HNAT)			+= mtk_hnat/
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
+old mode 100644
+new mode 100755
+index 48b0353bb..c4aab12b0
+--- a/drivers/net/ethernet/mediatek/mtk_wed.c
++++ b/drivers/net/ethernet/mediatek/mtk_wed.c
+@@ -13,11 +13,19 @@
+ #include <linux/debugfs.h>
+ #include <linux/iopoll.h>
+ #include <linux/soc/mediatek/mtk_wed.h>
++
+ #include "mtk_eth_soc.h"
+ #include "mtk_wed_regs.h"
+ #include "mtk_wed.h"
+ #include "mtk_ppe.h"
+-
++#include "mtk_wed_mcu.h"
++#include "mtk_wed_wo.h"
++
++struct wo_cmd_ring {
++	u32 q_base;
++	u32 cnt;
++	u32 unit;
++};
+ static struct mtk_wed_hw *hw_list[2];
+ static DEFINE_MUTEX(hw_lock);
+ 
+@@ -51,6 +59,12 @@ wdma_set(struct mtk_wed_device *dev, u32 reg, u32 mask)
+ 	wdma_m32(dev, reg, 0, mask);
+ }
+ 
++static void
++wdma_clr(struct mtk_wed_device *dev, u32 reg, u32 mask)
++{
++	wdma_m32(dev, reg, mask, 0);
++}
++
+ static u32
+ mtk_wed_read_reset(struct mtk_wed_device *dev)
+ {
+@@ -68,6 +82,48 @@ mtk_wed_reset(struct mtk_wed_device *dev, u32 mask)
+ 		WARN_ON_ONCE(1);
+ }
+ 
++static void
++mtk_wed_wo_reset(struct mtk_wed_device *dev)
++{
++	struct mtk_wed_wo *wo = dev->hw->wed_wo;
++	u8 state = WO_STATE_DISABLE;
++	u8 state_done = WOIF_DISABLE_DONE;
++	void __iomem *reg;
++	u32 value;
++	unsigned long timeout = jiffies + WOCPU_TIMEOUT;
++
++	mtk_wed_mcu_send_msg(wo, MODULE_ID_WO, WO_CMD_CHANGE_STATE,
++			     &state, sizeof(state), false);
++
++	do {
++		value = wed_r32(dev, MTK_WED_SCR0 + 4 * WED_DUMMY_CR_WO_STATUS);
++	} while (value != state_done && !time_after(jiffies, timeout));
++
++	reg = ioremap(WOCPU_MCUSYS_RESET_ADDR, 4);
++	value = readl((void *)reg);
++	switch(dev->hw->index) {
++	case 0:
++		value |= WOCPU_WO0_MCUSYS_RESET_MASK;
++		writel(value, (void *)reg);
++		value &= ~WOCPU_WO0_MCUSYS_RESET_MASK;
++		writel(value, (void *)reg);
++		break;
++	case 1:
++		value |= WOCPU_WO1_MCUSYS_RESET_MASK;
++		writel(value, (void *)reg);
++		value &= ~WOCPU_WO1_MCUSYS_RESET_MASK;
++		writel(value, (void *)reg);
++		break;
++	default:
++		dev_err(dev->hw->dev, "wrong mtk_wed%d\n",
++			dev->hw->index);
++
++		break;
++	}
++
++	iounmap((void *)reg);
++}
++
+ static struct mtk_wed_hw *
+ mtk_wed_assign(struct mtk_wed_device *dev)
+ {
+@@ -205,6 +261,42 @@ free_pagelist:
+ 	kfree(page_list);
+ }
+ 
++static int
++mtk_wed_rx_bm_alloc(struct mtk_wed_device *dev)
++{
++	struct mtk_rxbm_desc *desc;
++	dma_addr_t desc_phys;
++	int ring_size;
++
++	ring_size = dev->wlan.rx_nbuf;
++	dev->rx_buf_ring.size = ring_size;
++	desc = dma_alloc_coherent(dev->hw->dev, ring_size * sizeof(*desc),
++				  &desc_phys, GFP_KERNEL);
++	if (!desc)
++		return -ENOMEM;
++
++	dev->rx_buf_ring.desc = desc;
++	dev->rx_buf_ring.desc_phys = desc_phys;
++
++	dev->wlan.init_rx_buf(dev, dev->wlan.rx_pkt);
++	return 0;
++}
++
++static void
++mtk_wed_free_rx_bm(struct mtk_wed_device *dev)
++{
++	struct mtk_rxbm_desc *desc = dev->rx_buf_ring.desc;
++	int ring_size =dev->rx_buf_ring.size;
++
++	if (!desc)
++		return;
++
++	dev->wlan.release_rx_buf(dev);
++
++	dma_free_coherent(dev->hw->dev, ring_size * sizeof(*desc),
++			  desc, dev->buf_ring.desc_phys);
++}
++
+ static void
+ mtk_wed_free_ring(struct mtk_wed_device *dev, struct mtk_wed_ring *ring, int scale)
+ {
+@@ -226,13 +318,22 @@ mtk_wed_free_tx_rings(struct mtk_wed_device *dev)
+ 		mtk_wed_free_ring(dev, &dev->tx_wdma[i], dev->ver);
+ }
+ 
++static void
++mtk_wed_free_rx_rings(struct mtk_wed_device *dev)
++{
++	mtk_wed_free_rx_bm(dev);
++	mtk_wed_free_ring(dev, &dev->rro.rro_ring, 1);
++}
++
+ static void
+ mtk_wed_set_int(struct mtk_wed_device *dev, u32 irq_mask)
+ {
+ 	u32 wdma_mask;
+ 
+ 	wdma_mask = FIELD_PREP(MTK_WDMA_INT_MASK_RX_DONE, GENMASK(1, 0));
+-
++	if (dev->ver > MTK_WED_V1)
++		wdma_mask |= FIELD_PREP(MTK_WDMA_INT_MASK_TX_DONE,
++					GENMASK(1, 0));
+ 	/* wed control cr set */
+ 	wed_set(dev, MTK_WED_CTRL,
+ 		MTK_WED_CTRL_WDMA_INT_AGENT_EN |
+@@ -251,7 +352,7 @@ mtk_wed_set_int(struct mtk_wed_device *dev, u32 irq_mask)
+ 		wed_set(dev, MTK_WED_WPDMA_INT_CTRL,
+ 			MTK_WED_WPDMA_INT_CTRL_SUBRT_ADV);
+ 	} else {
+-		/* initail tx interrupt trigger */
++
+ 		wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX,
+ 			MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN |
+ 			MTK_WED_WPDMA_INT_CTRL_TX0_DONE_CLR |
+@@ -262,22 +363,30 @@ mtk_wed_set_int(struct mtk_wed_device *dev, u32 irq_mask)
+ 			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX1_DONE_TRIG,
+ 				   dev->wlan.tx_tbit[1]));
+ 
+-		/* initail txfree interrupt trigger */
+ 		wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_TX_FREE,
+ 			MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_EN |
+ 			MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_CLR |
+ 			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_TRIG,
+ 				    dev->wlan.txfree_tbit));
++
++		wed_w32(dev, MTK_WED_WPDMA_INT_CTRL_RX,
++			MTK_WED_WPDMA_INT_CTRL_RX0_EN |
++			MTK_WED_WPDMA_INT_CTRL_RX0_CLR |
++			MTK_WED_WPDMA_INT_CTRL_RX1_EN |
++			MTK_WED_WPDMA_INT_CTRL_RX1_CLR |
++			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG,
++				   dev->wlan.rx_tbit[0]) |
++			FIELD_PREP(MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG,
++				   dev->wlan.rx_tbit[1]));
+ 	}
+-	/* initail wdma interrupt agent */
+ 	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, wdma_mask);
+ 	if (dev->ver == MTK_WED_V1) {
+ 		wed_clr(dev, MTK_WED_WDMA_INT_CTRL, wdma_mask);
+ 	} else {
+ 		wed_w32(dev, MTK_WED_WDMA_INT_CLR, wdma_mask);
+ 		wed_set(dev, MTK_WED_WDMA_INT_CTRL,
+-			FIELD_PREP(MTK_WED_WDMA_INT_POLL_SRC_SEL,dev->wdma_idx));
+-
++			FIELD_PREP(MTK_WED_WDMA_INT_POLL_SRC_SEL,
++				   dev->wdma_idx));
+ 	}
+ 
+ 	wdma_w32(dev, MTK_WDMA_INT_MASK, wdma_mask);
+@@ -312,6 +421,39 @@ mtk_wed_set_512_support(struct mtk_wed_device *dev, bool en)
+ 	}
+ }
+ 
++static void
++mtk_wed_check_wfdma_rx_fill(struct mtk_wed_device *dev, int idx)
++{
++#define MTK_WFMDA_RX_DMA_EN 	BIT(2)
++
++	int timeout = 3;
++	u32 cur_idx, regs;
++
++	do {
++		regs = MTK_WED_WPDMA_RING_RX_DATA(idx) +
++		       MTK_WED_RING_OFS_COUNT;
++		cur_idx = wed_r32(dev, regs);
++		if (cur_idx == MTK_WED_RX_RING_SIZE - 1)
++			break;
++
++		usleep_range(100000, 200000);
++	} while (timeout-- > 0);
++
++	if (timeout) {
++		unsigned int val;
++
++		val = wifi_r32(dev, dev->wlan.wpdma_rx_glo -
++			       dev->wlan.phy_base);
++		val |= MTK_WFMDA_RX_DMA_EN;
++
++		wifi_w32(dev, dev->wlan.wpdma_rx_glo -
++			 dev->wlan.phy_base, val);
++	} else {
++		dev_err(dev->hw->dev, "mtk_wed%d: rx dma enable failed!\n",
++			       dev->hw->index);
++	}
++}
++
+ static void
+ mtk_wed_dma_enable(struct mtk_wed_device *dev)
+ {
+@@ -336,9 +478,14 @@ mtk_wed_dma_enable(struct mtk_wed_device *dev)
+ 		wdma_set(dev, MTK_WDMA_GLO_CFG,
+ 			 MTK_WDMA_GLO_CFG_RX_INFO3_PRERES);
+ 	} else {
++		int idx = 0;
++
+ 		wed_set(dev, MTK_WED_WPDMA_CTRL,
+ 			MTK_WED_WPDMA_CTRL_SDL1_FIXED);
+ 
++		wed_set(dev, MTK_WED_WDMA_GLO_CFG,
++			MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
++
+ 		wed_set(dev, MTK_WED_WPDMA_GLO_CFG,
+ 			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
+ 			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
+@@ -346,6 +493,15 @@ mtk_wed_dma_enable(struct mtk_wed_device *dev)
+ 		wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+ 			MTK_WED_WPDMA_GLO_CFG_TX_TKID_KEEP |
+ 			MTK_WED_WPDMA_GLO_CFG_TX_DMAD_DW3_PREV);
++
++		wed_set(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
++			MTK_WED_WPDMA_RX_D_RX_DRV_EN |
++			FIELD_PREP(MTK_WED_WPDMA_RX_D_RXD_READ_LEN, 0x18) |
++			FIELD_PREP(MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL,
++				   0x2));
++
++		for (idx = 0; idx < MTK_WED_RX_QUEUES; idx++)
++			mtk_wed_check_wfdma_rx_fill(dev, idx);
+ 	}
+ }
+ 
+@@ -363,19 +519,23 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
+ 		MTK_WED_GLO_CFG_TX_DMA_EN |
+ 		MTK_WED_GLO_CFG_RX_DMA_EN);
+ 
+-	wdma_m32(dev, MTK_WDMA_GLO_CFG,
++	wdma_clr(dev, MTK_WDMA_GLO_CFG,
+ 		 MTK_WDMA_GLO_CFG_TX_DMA_EN |
+ 		 MTK_WDMA_GLO_CFG_RX_INFO1_PRERES |
+-		 MTK_WDMA_GLO_CFG_RX_INFO2_PRERES, 0);
++		 MTK_WDMA_GLO_CFG_RX_INFO2_PRERES);
+ 
+ 	if (dev->ver == MTK_WED_V1) {
+ 		regmap_write(dev->hw->mirror, dev->hw->index * 4, 0);
+-		wdma_m32(dev, MTK_WDMA_GLO_CFG,
+-			 MTK_WDMA_GLO_CFG_RX_INFO3_PRERES, 0);
++		wdma_clr(dev, MTK_WDMA_GLO_CFG,
++			 MTK_WDMA_GLO_CFG_RX_INFO3_PRERES);
+ 	} else {
+ 		wed_clr(dev, MTK_WED_WPDMA_GLO_CFG,
+ 			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_PKT_PROC |
+ 			MTK_WED_WPDMA_GLO_CFG_RX_DRV_R0_CRX_SYNC);
++		wed_clr(dev, MTK_WED_WPDMA_RX_D_GLO_CFG,
++			MTK_WED_WPDMA_RX_D_RX_DRV_EN);
++		wed_clr(dev, MTK_WED_WDMA_GLO_CFG,
++			MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK);
+ 	}
+ }
+ 
+@@ -395,6 +555,11 @@ mtk_wed_stop(struct mtk_wed_device *dev)
+ 		MTK_WED_CTRL_WED_TX_BM_EN |
+ 		MTK_WED_CTRL_WED_TX_FREE_AGENT_EN);
+ 
++	if (dev->ver > MTK_WED_V1) {
++		wed_clr(dev, MTK_WED_CTRL,
++			MTK_WED_CTRL_WED_RX_BM_EN);
++	}
++
+ 	wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
+ 	wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
+ 	wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
+@@ -416,9 +581,15 @@ mtk_wed_detach(struct mtk_wed_device *dev)
+ 	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
+ 
+ 	mtk_wed_reset(dev, MTK_WED_RESET_WED);
++	mtk_wed_wo_reset(dev);
++
++	wdma_clr(dev, MTK_WDMA_GLO_CFG, MTK_WDMA_GLO_CFG_TX_DMA_EN);
++	wdma_w32(dev, MTK_WDMA_RESET_IDX, MTK_WDMA_RESET_IDX_TX);
++	wdma_w32(dev, MTK_WDMA_RESET_IDX, 0);
+ 
+ 	mtk_wed_free_buffer(dev);
+ 	mtk_wed_free_tx_rings(dev);
++	mtk_wed_free_rx_rings(dev);
+ 
+ 	if (dev->wlan.bus_type == MTK_BUS_TYPE_PCIE) {
+ 		wlan_node = dev->wlan.pci_dev->dev.of_node;
+@@ -477,7 +648,6 @@ mtk_wed_bus_init(struct mtk_wed_device *dev)
+ 		value = wed_r32(dev, MTK_WED_PCIE_CFG_INTM);
+ 		value = wed_r32(dev, MTK_WED_PCIE_CFG_BASE);
+ 
+-		/* pcie interrupt status trigger register */
+ 		wed_w32(dev, MTK_WED_PCIE_INT_TRIGGER, BIT(24));
+ 		wed_r32(dev, MTK_WED_PCIE_INT_TRIGGER);
+ 
+@@ -501,6 +671,9 @@ mtk_wed_set_wpdma(struct mtk_wed_device *dev)
+ 		wed_w32(dev, MTK_WED_WPDMA_CFG_INT_MASK,  dev->wlan.wpdma_mask);
+ 		wed_w32(dev, MTK_WED_WPDMA_CFG_TX,  dev->wlan.wpdma_tx);
+ 		wed_w32(dev, MTK_WED_WPDMA_CFG_TX_FREE,  dev->wlan.wpdma_txfree);
++
++		wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG,  dev->wlan.wpdma_rx_glo);
++		wed_w32(dev, MTK_WED_WPDMA_RX_RING,  dev->wlan.wpdma_rx);
+ 	} else {
+ 		wed_w32(dev, MTK_WED_WPDMA_CFG_BASE,  dev->wlan.wpdma_phys);
+ 	}
+@@ -549,24 +722,92 @@ mtk_wed_hw_init_early(struct mtk_wed_device *dev)
+ 			FIELD_PREP(MTK_WED_WDMA_OFST1_RX_CTRL,
+ 				   MTK_WDMA_RING_RX(0)));
+ 	}
++}
++
++static void
++mtk_wed_rx_bm_hw_init(struct mtk_wed_device *dev)
++{
++	wed_w32(dev, MTK_WED_RX_BM_RX_DMAD,
++		FIELD_PREP(MTK_WED_RX_BM_RX_DMAD_SDL0,  dev->wlan.rx_pkt_size));
++
++	wed_w32(dev, MTK_WED_RX_BM_BASE, dev->rx_buf_ring.desc_phys);
+ 
++	wed_w32(dev, MTK_WED_RX_BM_INIT_PTR, MTK_WED_RX_BM_INIT_SW_TAIL |
++		FIELD_PREP(MTK_WED_RX_BM_SW_TAIL, dev->wlan.rx_pkt));
++
++	wed_w32(dev, MTK_WED_RX_BM_DYN_ALLOC_TH,
++		FIELD_PREP(MTK_WED_RX_BM_DYN_ALLOC_TH_H, 0xffff));
++
++	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_WED_RX_BM_EN);
+ }
+ 
+ static void
+-mtk_wed_hw_init(struct mtk_wed_device *dev)
++mtk_wed_rro_hw_init(struct mtk_wed_device *dev)
++{
++	wed_w32(dev, MTK_WED_RROQM_MIOD_CFG,
++		FIELD_PREP(MTK_WED_RROQM_MIOD_MID_DW, 0x70 >> 2) |
++		FIELD_PREP(MTK_WED_RROQM_MIOD_MOD_DW, 0x10 >> 2) |
++		FIELD_PREP(MTK_WED_RROQM_MIOD_ENTRY_DW,
++			   MTK_WED_MIOD_ENTRY_CNT >> 2));
++
++	wed_w32(dev, MTK_WED_RROQM_MIOD_CTRL0, dev->rro.miod_desc_phys);
++
++	wed_w32(dev, MTK_WED_RROQM_MIOD_CTRL1,
++		FIELD_PREP(MTK_WED_RROQM_MIOD_CNT, MTK_WED_MIOD_CNT));
++
++	wed_w32(dev, MTK_WED_RROQM_FDBK_CTRL0, dev->rro.fdbk_desc_phys);
++
++	wed_w32(dev, MTK_WED_RROQM_FDBK_CTRL1,
++		FIELD_PREP(MTK_WED_RROQM_FDBK_CNT, MTK_WED_FB_CMD_CNT));
++
++	wed_w32(dev, MTK_WED_RROQM_FDBK_CTRL2, 0);
++
++	wed_w32(dev, MTK_WED_RROQ_BASE_L, dev->rro.rro_ring.desc_phys);
++
++	wed_set(dev, MTK_WED_RROQM_RST_IDX,
++		MTK_WED_RROQM_RST_IDX_MIOD |
++		MTK_WED_RROQM_RST_IDX_FDBK);
++
++	wed_w32(dev, MTK_WED_RROQM_RST_IDX, 0);
++
++	wed_w32(dev, MTK_WED_RROQM_MIOD_CTRL2, MTK_WED_MIOD_CNT -1);
++
++	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_RRO_QM_EN);
++}
++
++static void
++mtk_wed_route_qm_hw_init(struct mtk_wed_device *dev)
++{
++	wed_w32(dev, MTK_WED_RESET, MTK_WED_RESET_RX_ROUTE_QM);
++
++	do {
++		udelay(100);
++
++		if (!(wed_r32(dev, MTK_WED_RESET) & MTK_WED_RESET_RX_ROUTE_QM))
++			break;
++	} while (1);
++
++	/* configure RX_ROUTE_QM */
++	wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
++	wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_TXDMAD_FPORT);
++	wed_set(dev, MTK_WED_RTQM_GLO_CFG,
++		FIELD_PREP(MTK_WED_RTQM_TXDMAD_FPORT, 0x3 + dev->hw->index));
++	wed_clr(dev, MTK_WED_RTQM_GLO_CFG, MTK_WED_RTQM_Q_RST);
++
++	/* enable RX_ROUTE_QM */
++	wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_RX_ROUTE_QM_EN);
++}
++
++static void
++mtk_wed_tx_hw_init(struct mtk_wed_device *dev)
+ {
+ 	int size = dev->buf_ring.size;
+ 	int rev_size = MTK_WED_TX_RING_SIZE / 2;
+ 	int thr = 1;
+ 
+-	if (dev->init_done)
+-		return;
+-
+-	dev->init_done = true;
+-	mtk_wed_set_ext_int(dev, false);
+-
+ 	if (dev->ver > MTK_WED_V1) {
+-		size = MTK_WED_WDMA_RING_SIZE * 2 + dev->buf_ring.size;
++		size = MTK_WED_WDMA_RING_SIZE * ARRAY_SIZE(dev->tx_wdma) +
++		       dev->buf_ring.size;
+ 		rev_size = size;
+ 		thr = 0;
+ 	}
+@@ -609,13 +850,48 @@ mtk_wed_hw_init(struct mtk_wed_device *dev)
+ }
+ 
+ static void
+-mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size, int scale)
++mtk_wed_rx_hw_init(struct mtk_wed_device *dev)
+ {
++	wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX,
++		MTK_WED_WPDMA_RX_D_RST_CRX_IDX0 |
++		MTK_WED_WPDMA_RX_D_RST_CRX_IDX1 |
++		MTK_WED_WPDMA_RX_D_RST_DRV_IDX0 |
++		MTK_WED_WPDMA_RX_D_RST_DRV_IDX1);
++
++	wed_w32(dev, MTK_WED_WPDMA_RX_D_RST_IDX, 0);
++
++	mtk_wed_rx_bm_hw_init(dev);
++	mtk_wed_rro_hw_init(dev);
++	mtk_wed_route_qm_hw_init(dev);
++}
++
++static void
++mtk_wed_hw_init(struct mtk_wed_device *dev)
++{
++	if (dev->init_done)
++		return;
++
++	dev->init_done = true;
++	mtk_wed_set_ext_int(dev, false);
++	mtk_wed_tx_hw_init(dev);
++	if (dev->ver > MTK_WED_V1)
++		mtk_wed_rx_hw_init(dev);
++}
++
++static void
++mtk_wed_ring_reset(struct mtk_wdma_desc *desc, int size, int scale, bool tx)
++{
++	__le32 ctrl;
+ 	int i;
+ 
++	if (tx)
++		ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
++	else
++		ctrl = cpu_to_le32(MTK_WFDMA_DESC_CTRL_TO_HOST);
++
+ 	for (i = 0; i < size; i++) {
+ 		desc->buf0 = 0;
+-		desc->ctrl = cpu_to_le32(MTK_WDMA_DESC_CTRL_DMA_DONE);
++		desc->ctrl = ctrl;
+ 		desc->buf1 = 0;
+ 		desc->info = 0;
+ 		desc += scale;
+@@ -674,7 +950,7 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
+ 		if (!desc)
+ 			continue;
+ 
+-		mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE, dev->ver);
++		mtk_wed_ring_reset(desc, MTK_WED_TX_RING_SIZE, dev->ver, true);
+ 	}
+ 
+ 	if (mtk_wed_poll_busy(dev))
+@@ -729,9 +1005,24 @@ mtk_wed_reset_dma(struct mtk_wed_device *dev)
+ 
+ }
+ 
++static int
++mtk_wed_rro_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring,
++		   int size)
++{
++	ring->desc = dma_alloc_coherent(dev->hw->dev,
++					size * sizeof(*ring->desc),
++					&ring->desc_phys, GFP_KERNEL);
++	if (!ring->desc)
++		return -ENOMEM;
++
++	ring->size = size;
++	memset(ring->desc, 0, size);
++	return 0;
++}
++
+ static int
+ mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring,
+-		   int size, int scale)
++		   int size, int scale, bool tx)
+ {
+ 	ring->desc = dma_alloc_coherent(dev->hw->dev,
+ 					size * sizeof(*ring->desc) * scale,
+@@ -740,17 +1031,18 @@ mtk_wed_ring_alloc(struct mtk_wed_device *dev, struct mtk_wed_ring *ring,
+ 		return -ENOMEM;
+ 
+ 	ring->size = size;
+-	mtk_wed_ring_reset(ring->desc, size, scale);
++	mtk_wed_ring_reset(ring->desc, size, scale, tx);
+ 
+ 	return 0;
+ }
+ 
+ static int
+-mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size)
++mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size)
+ {
+ 	struct mtk_wed_ring *wdma = &dev->tx_wdma[idx];
+ 
+-	if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, dev->ver))
++	if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE,
++			       dev->ver, true))
+ 		return -ENOMEM;
+ 
+ 	wdma_w32(dev, MTK_WDMA_RING_RX(idx) + MTK_WED_RING_OFS_BASE,
+@@ -767,22 +1059,140 @@ mtk_wed_wdma_ring_setup(struct mtk_wed_device *dev, int idx, int size)
+ 	return 0;
+ }
+ 
++static int
++mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size)
++{
++	struct mtk_wed_ring *wdma = &dev->rx_wdma[idx];
++
++	if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE,
++			       dev->ver, true))
++		return -ENOMEM;
++
++	wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE,
++		 wdma->desc_phys);
++	wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_COUNT,
++		 size);
++	wdma_w32(dev,
++		 MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0);
++	wdma_w32(dev,
++		 MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_DMA_IDX, 0);
++
++	if (idx == 0)  {
++		wed_w32(dev, MTK_WED_WDMA_RING_TX
++			+ MTK_WED_RING_OFS_BASE, wdma->desc_phys);
++		wed_w32(dev, MTK_WED_WDMA_RING_TX
++			+ MTK_WED_RING_OFS_COUNT, size);
++		wed_w32(dev, MTK_WED_WDMA_RING_TX
++			+ MTK_WED_RING_OFS_CPU_IDX, 0);
++		wed_w32(dev, MTK_WED_WDMA_RING_TX
++			+ MTK_WED_RING_OFS_DMA_IDX, 0);
++	}
++
++	return 0;
++}
++
++static int
++mtk_wed_rro_alloc(struct mtk_wed_device *dev)
++{
++	struct device_node *np, *node = dev->hw->node;
++	struct mtk_wed_ring *ring;
++	struct resource res;
++	int ret;
++
++	np = of_parse_phandle(node, "mediatek,wocpu_dlm", 0);
++	if (!np)
++		return -ENODEV;
++
++	ret = of_address_to_resource(np, 0, &res);
++	if (ret)
++		return ret;
++
++	dev->rro.rro_desc = ioremap(res.start, resource_size(&res));
++
++	ring = &dev->rro.rro_ring;
++
++	dev->rro.miod_desc_phys = res.start;
++
++	dev->rro.mcu_view_miod = MTK_WED_WOCPU_VIEW_MIOD_BASE;
++	dev->rro.fdbk_desc_phys = MTK_WED_MIOD_ENTRY_CNT * MTK_WED_MIOD_CNT
++				  + dev->rro.miod_desc_phys;
++
++	if (mtk_wed_rro_ring_alloc(dev, ring, MTK_WED_RRO_QUE_CNT))
++		return -ENOMEM;
++
++	return 0;
++}
++
++static int
++mtk_wed_rro_cfg(struct mtk_wed_device *dev)
++{
++	struct mtk_wed_wo *wo = dev->hw->wed_wo;
++	struct {
++		struct wo_cmd_ring ring[2];
++
++		u32 wed;
++		u8 ver;
++	} req = {
++		.ring = {
++			[0] = {
++				.q_base = dev->rro.mcu_view_miod,
++				.cnt = MTK_WED_MIOD_CNT,
++				.unit = MTK_WED_MIOD_ENTRY_CNT,
++			},
++			[1] = {
++				.q_base = dev->rro.mcu_view_miod +
++					  MTK_WED_MIOD_ENTRY_CNT *
++					  MTK_WED_MIOD_CNT,
++				.cnt = MTK_WED_FB_CMD_CNT,
++				.unit = 4,
++			},
++		},
++		.wed = 0,
++	};
++
++	return mtk_wed_mcu_send_msg(wo, MODULE_ID_WO, WO_CMD_WED_CFG,
++				    &req, sizeof(req), true);
++}
++
++static int
++mtk_wed_send_msg(struct mtk_wed_device *dev, int cmd_id, void *data, int len)
++{
++	struct mtk_wed_wo *wo = dev->hw->wed_wo;
++
++	return mtk_wed_mcu_send_msg(wo, MODULE_ID_WO, cmd_id, data, len, true);
++}
++
++static void
++mtk_wed_ppe_check(struct mtk_wed_device *dev, struct sk_buff *skb,
++			u32 reason, u32 hash)
++{
++	int idx = dev->hw->index;
++	struct mtk_eth *eth = dev->hw->eth;
++	struct ethhdr *eh;
++
++	if (reason == MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) {
++		if (!skb)
++			return;
++
++		skb_set_mac_header(skb, 0);
++		eh = eth_hdr(skb);
++		skb->protocol = eh->h_proto;
++		mtk_ppe_check_skb(eth->ppe[idx], skb, hash);
++	}
++}
++
+ static void
+ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
+ {
+-	u32 wdma_mask;
+-	int i;
++	int i, ret;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(dev->tx_wdma); i++)
+ 		if (!dev->tx_wdma[i].desc)
+-			mtk_wed_wdma_ring_setup(dev, i, 16);
+-
++			mtk_wed_wdma_rx_ring_setup(dev, i, 16);
+ 
+ 	mtk_wed_hw_init(dev);
+ 
+ 	mtk_wed_set_int(dev, irq_mask);
+-
+-
+ 	mtk_wed_set_ext_int(dev, true);
+ 
+ 	if (dev->ver == MTK_WED_V1) {
+@@ -797,6 +1207,19 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask)
+ 		val |= BIT(0);
+ 		regmap_write(dev->hw->mirror, dev->hw->index * 4, val);
+ 	} else {
++		/* driver set mid ready and only once */
++		wed_w32(dev, MTK_WED_EXT_INT_MASK1,
++			MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY);
++		wed_w32(dev, MTK_WED_EXT_INT_MASK2,
++			MTK_WED_EXT_INT_STATUS_WPDMA_MID_RDY);
++
++		wed_r32(dev, MTK_WED_EXT_INT_MASK1);
++		wed_r32(dev, MTK_WED_EXT_INT_MASK2);
++
++		ret = mtk_wed_rro_cfg(dev);
++		if (ret)
++			return;
++
+ 		mtk_wed_set_512_support(dev, true);
+ 	}
+ 
+@@ -841,9 +1264,17 @@ mtk_wed_attach(struct mtk_wed_device *dev)
+ 			    wed_r32(dev, MTK_WED_REV_ID));
+ 
+ 	ret = mtk_wed_buffer_alloc(dev);
+-	if (ret) {
+-		mtk_wed_detach(dev);
+-		goto out;
++	if (ret)
++		goto error;
++
++	if (dev->ver > MTK_WED_V1) {
++		ret = mtk_wed_rx_bm_alloc(dev);
++		if (ret)
++			goto error;
++
++		ret = mtk_wed_rro_alloc(dev);
++		if (ret)
++			goto error;
+ 	}
+ 
+ 	mtk_wed_hw_init_early(dev);
+@@ -851,7 +1282,12 @@ mtk_wed_attach(struct mtk_wed_device *dev)
+ 	if (dev->ver == MTK_WED_V1)
+ 		regmap_update_bits(hw->hifsys, HIFSYS_DMA_AG_MAP,
+ 				   BIT(hw->index), 0);
++	else
++		ret = mtk_wed_wo_init(hw);
+ 
++error:
++	if (ret)
++		mtk_wed_detach(dev);
+ out:
+ 	mutex_unlock(&hw_lock);
+ 
+@@ -877,10 +1313,10 @@ mtk_wed_tx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
+ 
+ 	BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+ 
+-	if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE, 1))
++	if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE, 1, true))
+ 		return -ENOMEM;
+ 
+-	if (mtk_wed_wdma_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
++	if (mtk_wed_wdma_rx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
+ 		return -ENOMEM;
+ 
+ 	ring->reg_base = MTK_WED_RING_TX(idx);
+@@ -927,6 +1363,35 @@ mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs)
+ 	return 0;
+ }
+ 
++static int
++mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs)
++{
++	struct mtk_wed_ring *ring = &dev->rx_ring[idx];
++
++	BUG_ON(idx > ARRAY_SIZE(dev->rx_ring));
++
++
++	if (mtk_wed_ring_alloc(dev, ring, MTK_WED_RX_RING_SIZE, 1, false))
++		return -ENOMEM;
++
++	if (mtk_wed_wdma_tx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE))
++		return -ENOMEM;
++
++	ring->reg_base = MTK_WED_RING_RX_DATA(idx);
++	ring->wpdma = regs;
++
++	/* WPDMA ->  WED */
++	wpdma_rx_w32(dev, idx, MTK_WED_RING_OFS_BASE, ring->desc_phys);
++	wpdma_rx_w32(dev, idx, MTK_WED_RING_OFS_COUNT, MTK_WED_RX_RING_SIZE);
++
++	wed_w32(dev, MTK_WED_WPDMA_RING_RX_DATA(idx) + MTK_WED_RING_OFS_BASE,
++		ring->desc_phys);
++	wed_w32(dev, MTK_WED_WPDMA_RING_RX_DATA(idx) + MTK_WED_RING_OFS_COUNT,
++		MTK_WED_RX_RING_SIZE);
++
++	return 0;
++}
++
+ static u32
+ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
+ {
+@@ -1014,6 +1479,8 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+ 		.attach = mtk_wed_attach,
+ 		.tx_ring_setup = mtk_wed_tx_ring_setup,
+ 		.txfree_ring_setup = mtk_wed_txfree_ring_setup,
++		.rx_ring_setup = mtk_wed_rx_ring_setup,
++		.msg_update = mtk_wed_send_msg,
+ 		.start = mtk_wed_start,
+ 		.stop = mtk_wed_stop,
+ 		.reset_dma = mtk_wed_reset_dma,
+@@ -1022,6 +1489,7 @@ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+ 		.irq_get = mtk_wed_irq_get,
+ 		.irq_set_mask = mtk_wed_irq_set_mask,
+ 		.detach = mtk_wed_detach,
++		.ppe_check = mtk_wed_ppe_check,
+ 	};
+ 	struct device_node *eth_np = eth->dev->of_node;
+ 	struct platform_device *pdev;
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h
+index 9b17b7405..ec79b0d42 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed.h
++++ b/drivers/net/ethernet/mediatek/mtk_wed.h
+@@ -13,6 +13,7 @@
+ #define MTK_WED_PKT_SIZE		1900
+ #define MTK_WED_BUF_SIZE		2048
+ #define MTK_WED_BUF_PER_PAGE		(PAGE_SIZE / 2048)
++#define MTK_WED_RX_RING_SIZE		1536
+ 
+ #define MTK_WED_TX_RING_SIZE		2048
+ #define MTK_WED_WDMA_RING_SIZE		512
+@@ -21,8 +22,15 @@
+ #define MTK_WED_PER_GROUP_PKT		128
+ 
+ #define MTK_WED_FBUF_SIZE		128
++#define MTK_WED_MIOD_CNT		16
++#define MTK_WED_FB_CMD_CNT		1024
++#define MTK_WED_RRO_QUE_CNT		8192
++#define MTK_WED_MIOD_ENTRY_CNT		128
++
++#define MODULE_ID_WO		1
+ 
+ struct mtk_eth;
++struct mtk_wed_wo;
+ 
+ struct mtk_wed_hw {
+ 	struct device_node *node;
+@@ -34,12 +42,14 @@ struct mtk_wed_hw {
+ 	struct regmap *mirror;
+ 	struct dentry *debugfs_dir;
+ 	struct mtk_wed_device *wed_dev;
++	struct mtk_wed_wo *wed_wo;
+ 	u32 debugfs_reg;
+ 	u32 num_flows;
+ 	u32 wdma_phy;
+ 	char dirname[5];
+ 	int irq;
+ 	int index;
++	u32 ver;
+ };
+ 
+ struct mtk_wdma_info {
+@@ -66,6 +76,18 @@ wed_r32(struct mtk_wed_device *dev, u32 reg)
+ 	return val;
+ }
+ 
++static inline u32
++wifi_r32(struct mtk_wed_device *dev, u32 reg)
++{
++	return readl(dev->wlan.base + reg);
++}
++
++static inline void
++wifi_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
++{
++	writel(val, dev->wlan.base + reg);
++}
++
+ static inline void
+ wdma_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
+ {
+@@ -114,6 +136,23 @@ wpdma_txfree_w32(struct mtk_wed_device *dev, u32 reg, u32 val)
+ 	writel(val, dev->txfree_ring.wpdma + reg);
+ }
+ 
++static inline u32
++wpdma_rx_r32(struct mtk_wed_device *dev, int ring, u32 reg)
++{
++	if (!dev->rx_ring[ring].wpdma)
++		return 0;
++
++	return readl(dev->rx_ring[ring].wpdma + reg);
++}
++
++static inline void
++wpdma_rx_w32(struct mtk_wed_device *dev, int ring, u32 reg, u32 val)
++{
++	if (!dev->rx_ring[ring].wpdma)
++		return;
++
++	writel(val, dev->rx_ring[ring].wpdma + reg);
++}
+ void mtk_wed_add_hw(struct device_node *np, struct mtk_eth *eth,
+ 		    void __iomem *wdma, u32 wdma_phy, int index);
+ void mtk_wed_exit(void);
+@@ -146,4 +185,15 @@ static inline void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
+ }
+ #endif
+ 
++int wed_wo_hardware_init(struct mtk_wed_wo *wo, irq_handler_t isr);
++int wed_wo_mcu_init(struct mtk_wed_wo *wo);
++int mtk_wed_exception_init(struct mtk_wed_wo *wo);
++void mtk_wed_mcu_rx_unsolicited_event(struct mtk_wed_wo *wo, struct sk_buff *skb);
++int mtk_wed_mcu_cmd_sanity_check(struct mtk_wed_wo *wo, struct sk_buff *skb);
++void wed_wo_mcu_debugfs(struct mtk_wed_hw *hw, struct dentry *dir);
++void mtk_wed_mcu_rx_event(struct mtk_wed_wo *wo, struct sk_buff *skb);
++int mtk_wed_mcu_send_msg(struct mtk_wed_wo *wo,int to_id, int cmd,
++			const void *data, int len, bool wait_resp);
++int mtk_wed_wo_rx_poll(struct napi_struct *napi, int budget);
++
+ #endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_ccif.c b/drivers/net/ethernet/mediatek/mtk_wed_ccif.c
+new file mode 100644
+index 000000000..732ffc8cf
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_ccif.c
+@@ -0,0 +1,121 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++#include <linux/soc/mediatek/mtk_wed.h>
++#include <linux/of_address.h>
++#include <linux/mfd/syscon.h>
++#include <linux/of_irq.h>
++#include "mtk_wed_ccif.h"
++#include "mtk_wed_regs.h"
++#include "mtk_wed_wo.h"
++
++static inline void woif_set_isr(struct mtk_wed_wo *wo, u32 mask)
++{
++	woccif_w32(wo, MTK_WED_WO_CCIF_IRQ0_MASK, mask);
++}
++
++static inline u32 woif_get_csr(struct mtk_wed_wo *wo)
++{
++	u32 val;
++
++	val = woccif_r32(wo, MTK_WED_WO_CCIF_RCHNUM);
++
++	return  val & MTK_WED_WO_CCIF_RCHNUM_MASK;
++}
++
++static inline void woif_set_ack(struct mtk_wed_wo *wo, u32 mask)
++{
++	woccif_w32(wo, MTK_WED_WO_CCIF_ACK, mask);
++}
++
++static inline void woif_kickout(struct mtk_wed_wo *wo)
++{
++	woccif_w32(wo, MTK_WED_WO_CCIF_BUSY, 1 << MTK_WED_WO_TXCH_NUM);
++	woccif_w32(wo, MTK_WED_WO_CCIF_TCHNUM, MTK_WED_WO_TXCH_NUM);
++}
++
++static inline void woif_clear_int(struct mtk_wed_wo *wo, u32 mask)
++{
++	woccif_w32(wo, MTK_WED_WO_CCIF_ACK, mask);
++	woccif_r32(wo, MTK_WED_WO_CCIF_RCHNUM);
++}
++
++int wed_wo_hardware_init(struct mtk_wed_wo *wo, irq_handler_t isr)
++{
++	static const struct wed_wo_drv_ops wo_drv_ops = {
++		.kickout = woif_kickout,
++		.set_ack = woif_set_ack,
++		.set_isr = woif_set_isr,
++		.get_csr = woif_get_csr,
++		.clear_int = woif_clear_int,
++	};
++	struct device_node *np, *node = wo->hw->node;
++	struct wed_wo_queue_regs queues;
++	struct regmap *regs;
++	int ret;
++
++	np = of_parse_phandle(node, "mediatek,ap2woccif", 0);
++	if (!np)
++		return -ENODEV;
++
++	regs = syscon_regmap_lookup_by_phandle(np, NULL);
++	if (!regs)
++		return -ENODEV;
++
++	wo->drv_ops = &wo_drv_ops;
++
++	wo->ccif.regs = regs;
++	wo->ccif.irq = irq_of_parse_and_map(np, 0);
++
++	spin_lock_init(&wo->ccif.irq_lock);
++
++	ret = request_irq(wo->ccif.irq, isr, IRQF_TRIGGER_HIGH,
++			  "wo_ccif_isr", wo);
++	if (ret)
++		goto free_irq;
++
++	queues.desc_base = MTK_WED_WO_CCIF_DUMMY1;
++	queues.ring_size = MTK_WED_WO_CCIF_DUMMY2;
++	queues.cpu_idx = MTK_WED_WO_CCIF_DUMMY3;
++	queues.dma_idx = MTK_WED_WO_CCIF_SHADOW4;
++
++	ret = mtk_wed_wo_q_alloc(wo, &wo->q_tx, MTK_WED_WO_RING_SIZE,
++				 MTK_WED_WO_CMD_LEN, MTK_WED_WO_TXCH_NUM,
++				 &queues);
++
++	if (ret)
++		goto free_irq;
++
++	queues.desc_base = MTK_WED_WO_CCIF_DUMMY5;
++	queues.ring_size = MTK_WED_WO_CCIF_DUMMY6;
++	queues.cpu_idx = MTK_WED_WO_CCIF_DUMMY7;
++	queues.dma_idx = MTK_WED_WO_CCIF_SHADOW8;
++
++	ret = mtk_wed_wo_q_alloc(wo, &wo->q_rx, MTK_WED_WO_RING_SIZE,
++				 MTK_WED_WO_CMD_LEN, MTK_WED_WO_RXCH_NUM,
++				 &queues);
++	if (ret)
++		goto free_irq;
++
++	wo->ccif.q_int_mask = MTK_WED_WO_RXCH_INT_MASK;
++
++	ret = mtk_wed_wo_q_init(wo, mtk_wed_wo_rx_poll);
++	if (ret)
++		goto free_irq;
++
++	wo->ccif.q_exep_mask = MTK_WED_WO_EXCEPTION_INT_MASK;
++	wo->ccif.irqmask = MTK_WED_WO_ALL_INT_MASK;
++
++	/* rx queue irqmask */
++	wo->drv_ops->set_isr(wo, wo->ccif.irqmask);
++
++	return 0;
++
++free_irq:
++	devm_free_irq(wo->hw->dev, wo->ccif.irq, wo);
++
++	return ret;
++}
++
++static void wed_wo_hardware_exit(struct mtk_wed_wo *wo)
++{
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_ccif.h b/drivers/net/ethernet/mediatek/mtk_wed_ccif.h
+new file mode 100644
+index 000000000..68ade449c
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_ccif.h
+@@ -0,0 +1,45 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++#ifndef __MTK_WED_CCIF_H
++#define __MTK_WED_CCIF_H
++
++#define MTK_WED_WO_RING_SIZE	256
++#define MTK_WED_WO_CMD_LEN	1504
++
++#define MTK_WED_WO_TXCH_NUM	0
++#define MTK_WED_WO_RXCH_NUM	1
++#define MTK_WED_WO_RXCH_WO_EXCEPTION	7
++
++#define MTK_WED_WO_TXCH_INT_MASK	BIT(0)
++#define MTK_WED_WO_RXCH_INT_MASK	BIT(1)
++#define MTK_WED_WO_EXCEPTION_INT_MASK	BIT(7)
++#define MTK_WED_WO_ALL_INT_MASK		MTK_WED_WO_RXCH_INT_MASK |	\
++					MTK_WED_WO_EXCEPTION_INT_MASK
++
++#define MTK_WED_WO_CCIF_BUSY		0x004
++#define MTK_WED_WO_CCIF_START		0x008
++#define MTK_WED_WO_CCIF_TCHNUM		0x00c
++#define MTK_WED_WO_CCIF_RCHNUM		0x010
++#define MTK_WED_WO_CCIF_RCHNUM_MASK	GENMASK(7, 0)
++
++#define MTK_WED_WO_CCIF_ACK		0x014
++#define MTK_WED_WO_CCIF_IRQ0_MASK	0x018
++#define MTK_WED_WO_CCIF_IRQ1_MASK	0x01c
++#define MTK_WED_WO_CCIF_DUMMY1		0x020
++#define MTK_WED_WO_CCIF_DUMMY2		0x024
++#define MTK_WED_WO_CCIF_DUMMY3		0x028
++#define MTK_WED_WO_CCIF_DUMMY4		0x02c
++#define MTK_WED_WO_CCIF_SHADOW1		0x030
++#define MTK_WED_WO_CCIF_SHADOW2		0x034
++#define MTK_WED_WO_CCIF_SHADOW3		0x038
++#define MTK_WED_WO_CCIF_SHADOW4		0x03c
++#define MTK_WED_WO_CCIF_DUMMY5		0x050
++#define MTK_WED_WO_CCIF_DUMMY6		0x054
++#define MTK_WED_WO_CCIF_DUMMY7		0x058
++#define MTK_WED_WO_CCIF_DUMMY8		0x05c
++#define MTK_WED_WO_CCIF_SHADOW5		0x060
++#define MTK_WED_WO_CCIF_SHADOW6		0x064
++#define MTK_WED_WO_CCIF_SHADOW7		0x068
++#define MTK_WED_WO_CCIF_SHADOW8		0x06c
++
++#endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+index f420f187e..fea7ae2fc 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
++++ b/drivers/net/ethernet/mediatek/mtk_wed_debugfs.c
+@@ -2,6 +2,7 @@
+ /* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
+ 
+ #include <linux/seq_file.h>
++#include <linux/soc/mediatek/mtk_wed.h>
+ #include "mtk_wed.h"
+ #include "mtk_wed_regs.h"
+ 
+@@ -18,6 +19,8 @@ enum {
+ 	DUMP_TYPE_WDMA,
+ 	DUMP_TYPE_WPDMA_TX,
+ 	DUMP_TYPE_WPDMA_TXFREE,
++	DUMP_TYPE_WPDMA_RX,
++	DUMP_TYPE_WED_RRO,
+ };
+ 
+ #define DUMP_STR(_str) { _str, 0, DUMP_TYPE_STRING }
+@@ -36,6 +39,10 @@ enum {
+ 
+ #define DUMP_WPDMA_TX_RING(_n) DUMP_RING("WPDMA_TX" #_n, 0, DUMP_TYPE_WPDMA_TX, _n)
+ #define DUMP_WPDMA_TXFREE_RING DUMP_RING("WPDMA_RX1", 0, DUMP_TYPE_WPDMA_TXFREE)
++#define DUMP_WPDMA_RX_RING(_n)	DUMP_RING("WPDMA_RX" #_n, 0, DUMP_TYPE_WPDMA_RX, _n)
++#define DUMP_WED_RRO_RING(_base)DUMP_RING("WED_RRO_MIOD", MTK_##_base, DUMP_TYPE_WED_RRO)
++#define DUMP_WED_RRO_FDBK(_base)DUMP_RING("WED_RRO_FDBK", MTK_##_base, DUMP_TYPE_WED_RRO)
++
+ 
+ static void
+ print_reg_val(struct seq_file *s, const char *name, u32 val)
+@@ -58,6 +65,7 @@ dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev,
+ 				   cur->name);
+ 			continue;
+ 		case DUMP_TYPE_WED:
++		case DUMP_TYPE_WED_RRO:
+ 			val = wed_r32(dev, cur->offset);
+ 			break;
+ 		case DUMP_TYPE_WDMA:
+@@ -69,6 +77,9 @@ dump_wed_regs(struct seq_file *s, struct mtk_wed_device *dev,
+ 		case DUMP_TYPE_WPDMA_TXFREE:
+ 			val = wpdma_txfree_r32(dev, cur->offset);
+ 			break;
++		case DUMP_TYPE_WPDMA_RX:
++			val = wpdma_rx_r32(dev, cur->base, cur->offset);
++			break;
+ 		}
+ 		print_reg_val(s, cur->name, val);
+ 	}
+@@ -132,6 +143,81 @@ wed_txinfo_show(struct seq_file *s, void *data)
+ }
+ DEFINE_SHOW_ATTRIBUTE(wed_txinfo);
+ 
++static int
++wed_rxinfo_show(struct seq_file *s, void *data)
++{
++	static const struct reg_dump regs[] = {
++		DUMP_STR("WPDMA RX"),
++		DUMP_WPDMA_RX_RING(0),
++		DUMP_WPDMA_RX_RING(1),
++
++		DUMP_STR("WPDMA RX"),
++		DUMP_WED(WED_WPDMA_RX_D_MIB(0)),
++		DUMP_WED_RING(WED_WPDMA_RING_RX_DATA(0)),
++		DUMP_WED(WED_WPDMA_RX_D_PROCESSED_MIB(0)),
++		DUMP_WED(WED_WPDMA_RX_D_MIB(1)),
++		DUMP_WED_RING(WED_WPDMA_RING_RX_DATA(1)),
++		DUMP_WED(WED_WPDMA_RX_D_PROCESSED_MIB(1)),
++		DUMP_WED(WED_WPDMA_RX_D_COHERENT_MIB),
++
++		DUMP_STR("WED RX"),
++		DUMP_WED_RING(WED_RING_RX_DATA(0)),
++		DUMP_WED_RING(WED_RING_RX_DATA(1)),
++
++		DUMP_STR("WED RRO"),
++		DUMP_WED_RRO_RING(WED_RROQM_MIOD_CTRL0),
++		DUMP_WED(WED_RROQM_MID_MIB),
++		DUMP_WED(WED_RROQM_MOD_MIB),
++		DUMP_WED(WED_RROQM_MOD_COHERENT_MIB),
++		DUMP_WED_RRO_FDBK(WED_RROQM_FDBK_CTRL0),
++		DUMP_WED(WED_RROQM_FDBK_IND_MIB),
++		DUMP_WED(WED_RROQM_FDBK_ENQ_MIB),
++		DUMP_WED(WED_RROQM_FDBK_ANC_MIB),
++		DUMP_WED(WED_RROQM_FDBK_ANC2H_MIB),
++
++		DUMP_STR("WED Route QM"),
++		DUMP_WED(WED_RTQM_R2H_MIB(0)),
++		DUMP_WED(WED_RTQM_R2Q_MIB(0)),
++		DUMP_WED(WED_RTQM_Q2H_MIB(0)),
++		DUMP_WED(WED_RTQM_R2H_MIB(1)),
++		DUMP_WED(WED_RTQM_R2Q_MIB(1)),
++		DUMP_WED(WED_RTQM_Q2H_MIB(1)),
++		DUMP_WED(WED_RTQM_Q2N_MIB),
++		DUMP_WED(WED_RTQM_Q2B_MIB),
++		DUMP_WED(WED_RTQM_PFDBK_MIB),
++
++		DUMP_STR("WED WDMA TX"),
++		DUMP_WED(WED_WDMA_TX_MIB),
++		DUMP_WED_RING(WED_WDMA_RING_TX),
++
++		DUMP_STR("WDMA TX"),
++		DUMP_WDMA(WDMA_GLO_CFG),
++		DUMP_WDMA_RING(WDMA_RING_TX(0)),
++		DUMP_WDMA_RING(WDMA_RING_TX(1)),
++
++		DUMP_STR("WED RX BM"),
++		DUMP_WED(WED_RX_BM_BASE),
++		DUMP_WED(WED_RX_BM_RX_DMAD),
++		DUMP_WED(WED_RX_BM_PTR),
++		DUMP_WED(WED_RX_BM_TKID_MIB),
++		DUMP_WED(WED_RX_BM_BLEN),
++		DUMP_WED(WED_RX_BM_STS),
++		DUMP_WED(WED_RX_BM_INTF2),
++		DUMP_WED(WED_RX_BM_INTF),
++		DUMP_WED(WED_RX_BM_ERR_STS),
++	};
++
++	struct mtk_wed_hw *hw = s->private;
++	struct mtk_wed_device *dev = hw->wed_dev;
++
++	if (!dev)
++		return 0;
++
++	dump_wed_regs(s, dev, regs, ARRAY_SIZE(regs));
++
++	return 0;
++}
++DEFINE_SHOW_ATTRIBUTE(wed_rxinfo);
+ 
+ static int
+ mtk_wed_reg_set(void *data, u64 val)
+@@ -175,4 +261,8 @@ void mtk_wed_hw_add_debugfs(struct mtk_wed_hw *hw)
+ 	debugfs_create_u32("regidx", 0600, dir, &hw->debugfs_reg);
+ 	debugfs_create_file_unsafe("regval", 0600, dir, hw, &fops_regval);
+ 	debugfs_create_file_unsafe("txinfo", 0400, dir, hw, &wed_txinfo_fops);
++	debugfs_create_file_unsafe("rxinfo", 0400, dir, hw, &wed_rxinfo_fops);
++	if (hw->ver > MTK_WED_V1) {
++		wed_wo_mcu_debugfs(hw, dir);
++	}
+ }
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
+new file mode 100644
+index 000000000..bd1ab9500
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c
+@@ -0,0 +1,561 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++#include <linux/skbuff.h>
++#include <linux/debugfs.h>
++#include <linux/firmware.h>
++#include <linux/of_address.h>
++#include <linux/soc/mediatek/mtk_wed.h>
++#include "mtk_wed_regs.h"
++#include "mtk_wed_mcu.h"
++#include "mtk_wed_wo.h"
++
++struct sk_buff *
++mtk_wed_mcu_msg_alloc(struct mtk_wed_wo *wo,
++		      const void *data, int data_len)
++{
++	const struct wed_wo_mcu_ops *ops = wo->mcu_ops;
++	int length = ops->headroom + data_len;
++	struct sk_buff *skb;
++
++	skb = alloc_skb(length, GFP_KERNEL);
++	if (!skb)
++		return NULL;
++
++	memset(skb->head, 0, length);
++	skb_reserve(skb, ops->headroom);
++
++	if (data && data_len)
++		skb_put_data(skb, data, data_len);
++
++	return skb;
++}
++
++struct sk_buff *
++mtk_wed_mcu_get_response(struct mtk_wed_wo *wo, unsigned long expires)
++{
++	unsigned long timeout;
++
++	if (!time_is_after_jiffies(expires))
++		return NULL;
++
++	timeout = expires - jiffies;
++	wait_event_timeout(wo->mcu.wait,
++			   (!skb_queue_empty(&wo->mcu.res_q)),
++			   timeout);
++
++	return skb_dequeue(&wo->mcu.res_q);
++}
++
++int
++mtk_wed_mcu_skb_send_and_get_msg(struct mtk_wed_wo *wo,
++				 int to_id, int cmd, struct sk_buff *skb,
++				 bool wait_resp, struct sk_buff **ret_skb)
++{
++	unsigned long expires;
++	int ret, seq;
++
++	if (ret_skb)
++		*ret_skb = NULL;
++
++	mutex_lock(&wo->mcu.mutex);
++
++	ret = wo->mcu_ops->mcu_skb_send_msg(wo, to_id, cmd, skb, &seq, wait_resp);
++	if (ret < 0)
++		goto out;
++
++	if (!wait_resp) {
++		ret = 0;
++		goto out;
++	}
++
++	expires = jiffies + wo->mcu.timeout;
++
++	do {
++		skb = mtk_wed_mcu_get_response(wo, expires);
++		ret = wo->mcu_ops->mcu_parse_response(wo, cmd, skb, seq);
++
++		if (!ret && ret_skb)
++			*ret_skb = skb;
++		else
++			dev_kfree_skb(skb);
++	} while (ret == -EAGAIN);
++
++out:
++	mutex_unlock(&wo->mcu.mutex);
++
++	return ret;
++}
++
++void mtk_wed_mcu_rx_event(struct mtk_wed_wo *wo,
++			struct sk_buff *skb)
++{
++	skb_queue_tail(&wo->mcu.res_q, skb);
++	wake_up(&wo->mcu.wait);
++}
++
++static int mtk_wed_mcu_send_and_get_msg(struct mtk_wed_wo *wo,
++			int to_id, int cmd, const void *data, int len,
++			bool wait_resp, struct sk_buff **ret_skb)
++{
++	struct sk_buff *skb;
++
++	skb = mtk_wed_mcu_msg_alloc(wo, data, len);
++	if (!skb)
++		return -ENOMEM;
++
++	return mtk_wed_mcu_skb_send_and_get_msg(wo, to_id, cmd, skb, wait_resp, ret_skb);
++}
++
++int
++mtk_wed_mcu_send_msg(struct mtk_wed_wo *wo,
++			int to_id, int cmd,
++			const void *data, int len, bool wait_resp)
++{
++	struct sk_buff *skb = NULL;
++	int ret = 0;
++
++	ret = mtk_wed_mcu_send_and_get_msg(wo, to_id, cmd, data,
++					   len, wait_resp, &skb);
++	if (skb)
++		dev_kfree_skb(skb);
++
++	return ret;
++}
++
++int mtk_wed_exception_init(struct mtk_wed_wo *wo)
++{
++	struct wed_wo_exception *exp = &wo->exp;
++	struct {
++		u32 arg0;
++		u32 arg1;
++	}req;
++
++	exp->log_size = EXCEPTION_LOG_SIZE;
++	exp->log = kmalloc(exp->log_size, GFP_ATOMIC);
++	if (!exp->log)
++		return -ENOMEM;
++
++	memset(exp->log, 0, exp->log_size);
++	exp->phys = dma_map_single(wo->hw->dev, exp->log, exp->log_size,
++				   DMA_FROM_DEVICE);
++
++	if (unlikely(dma_mapping_error(wo->hw->dev, exp->phys))) {
++		dev_info(wo->hw->dev, "dma map error\n");
++		goto free;
++	}
++
++	req.arg0 = (u32)exp->phys;
++	req.arg1 = (u32)exp->log_size;
++
++	return mtk_wed_mcu_send_msg(wo, MODULE_ID_WO, WO_CMD_EXCEPTION_INIT,
++				    &req, sizeof(req), false);
++
++free:
++	kfree(exp->log);
++	return -ENOMEM;
++}
++
++int
++mtk_wed_mcu_cmd_sanity_check(struct mtk_wed_wo *wo, struct sk_buff *skb)
++{
++	struct wed_cmd_hdr *hdr = (struct wed_cmd_hdr *)skb->data;
++
++	if (hdr->ver != 0)
++		return WARP_INVALID_PARA_STATUS;
++
++	if (skb->len < sizeof(struct wed_cmd_hdr))
++		return WARP_INVALID_PARA_STATUS;
++
++	if (skb->len != hdr->length)
++		return WARP_INVALID_PARA_STATUS;
++
++	return WARP_OK_STATUS;
++}
++
++void
++mtk_wed_mcu_rx_unsolicited_event(struct mtk_wed_wo *wo, struct sk_buff *skb)
++{
++	struct wed_cmd_hdr *hdr = (struct wed_cmd_hdr *)skb->data;
++	struct wed_wo_log *record;
++	char *msg = (char *)(skb->data + sizeof(struct wed_cmd_hdr));
++	u16 msg_len = skb->len - sizeof(struct wed_cmd_hdr);
++	u32 i, cnt = 0;
++
++	switch (hdr->cmd_id) {
++	case WO_EVT_LOG_DUMP:
++		pr_info("[WO LOG]: %s\n", msg);
++		break;
++	case WO_EVT_PROFILING:
++		cnt = msg_len / (sizeof(struct wed_wo_log));
++		record = (struct wed_wo_log *) msg;
++		dev_info(wo->hw->dev, "[WO Profiling]: %d report arrived!\n", cnt);
++
++		for (i = 0 ; i < cnt ; i++) {
++			//PROFILE_STAT(wo->total, record[i].total);
++			//PROFILE_STAT(wo->mod, record[i].mod);
++			//PROFILE_STAT(wo->rro, record[i].rro);
++
++			dev_info(wo->hw->dev, "[WO Profiling]:  SN:%u with latency: total=%u, rro:%u, mod:%u\n",
++				 record[i].sn,
++				 record[i].total,
++				 record[i].rro,
++				 record[i].mod);
++		}
++		break;
++
++	default:
++		break;
++	}
++
++	dev_kfree_skb(skb);
++
++}
++
++static int
++mtk_wed_load_firmware(struct mtk_wed_wo *wo)
++{
++	struct fw_info {
++		__le32 decomp_crc;
++		__le32 decomp_len;
++		__le32 decomp_blk_sz;
++		u8 reserved[4];
++		__le32 addr;
++		__le32 len;
++		u8 feature_set;
++		u8 reserved1[15];
++	} __packed *region;
++
++	char *mcu;
++	const struct mtk_wed_fw_trailer *hdr;
++	static u8 shared[MAX_REGION_SIZE] = {0};
++	const struct firmware *fw;
++	int ret, i;
++	u32 ofs = 0;
++	u32 boot_cr, val;
++
++	mcu = wo->hw->index ? MT7986_FIRMWARE_WO_2 : MT7986_FIRMWARE_WO_1;
++
++	ret = request_firmware(&fw, mcu, wo->hw->dev);
++	if (ret)
++		return ret;
++
++	hdr = (const struct mtk_wed_fw_trailer *)(fw->data + fw->size -
++						  sizeof(*hdr));
++
++	dev_info(wo->hw->dev, "WO Firmware Version: %.10s, Build Time: %.15s\n",
++		 hdr->fw_ver, hdr->build_date);
++
++	for (i = 0; i < hdr->n_region; i++) {
++		int j = 0;
++		region = (struct fw_info *)(fw->data + fw->size -
++					    sizeof(*hdr) -
++					    sizeof(*region) *
++					    (hdr->n_region - i));
++
++		while (j < MAX_REGION_SIZE) {
++			struct mtk_wed_fw_region *wo_region;
++
++			wo_region = &wo->region[j];
++			if (!wo_region->addr)
++				break;
++
++			if (wo_region->addr_pa == region->addr) {
++				if (!wo_region->shared) {
++					memcpy(wo_region->addr,
++					       fw->data + ofs, region->len);
++				} else if (!shared[j]) {
++					memcpy(wo_region->addr,
++					       fw->data + ofs, region->len);
++					shared[j] = true;
++				}
++			}
++			j++;
++		}
++
++		if (j == __WO_REGION_MAX) {
++			ret = -ENOENT;
++			goto done;
++		}
++		ofs += region->len;
++	}
++
++	/* write the start address */
++	boot_cr = wo->hw->index ?
++		WOX_MCU_CFG_LS_WA_BOOT_ADDR_ADDR : WOX_MCU_CFG_LS_WM_BOOT_ADDR_ADDR;
++	wo_w32(wo, boot_cr, (wo->region[WO_REGION_EMI].addr_pa >> 16));
++
++	/* wo firmware reset */
++	wo_w32(wo, WOX_MCU_CFG_LS_WF_MCCR_CLR_ADDR, 0xc00);
++
++	val = wo_r32(wo, WOX_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR);
++
++	val |= wo->hw->index ? WOX_MCU_CFG_LS_WF_MCU_CFG_WM_WA_WA_CPU_RSTB_MASK :
++		WOX_MCU_CFG_LS_WF_MCU_CFG_WM_WA_WM_CPU_RSTB_MASK;
++
++	wo_w32(wo, WOX_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR, val);
++
++done:
++	release_firmware(fw);
++
++	return ret;
++}
++
++static int
++mtk_wed_get_firmware_region(struct mtk_wed_wo *wo)
++{
++	struct device_node *node, *np = wo->hw->node;
++	struct mtk_wed_fw_region *region;
++	struct resource res;
++	const char *compat;
++	int i, ret;
++
++	static const char *const wo_region_compat[__WO_REGION_MAX] = {
++		[WO_REGION_EMI] = WOCPU_EMI_DEV_NODE,
++		[WO_REGION_ILM] = WOCPU_ILM_DEV_NODE,
++		[WO_REGION_DATA] = WOCPU_DATA_DEV_NODE,
++		[WO_REGION_BOOT] = WOCPU_BOOT_DEV_NODE,
++	};
++
++	for (i = 0; i < __WO_REGION_MAX; i++) {
++		region = &wo->region[i];
++		compat = wo_region_compat[i];
++
++		node = of_parse_phandle(np, compat, 0);
++		if (!node)
++			return -ENODEV;
++
++		ret = of_address_to_resource(node, 0, &res);
++		if (ret)
++			return ret;
++
++		region->addr_pa = res.start;
++		region->size = resource_size(&res);
++		region->addr = ioremap(region->addr_pa, region->size);
++
++		of_property_read_u32_index(node, "shared", 0, &region->shared);
++	}
++
++	return 0;
++}
++
++static int
++wo_mcu_send_message(struct mtk_wed_wo *wo,
++			int to_id, int cmd, struct sk_buff *skb,
++			int *wait_seq, bool wait_resp)
++{
++	struct wed_cmd_hdr  *hdr;
++	u8 seq = 0;
++
++	/* TDO: make dynamic based on msg type */
++	wo->mcu.timeout = 20 * HZ;
++
++	if (wait_resp && wait_seq) {
++		seq = wo->mcu.msg_seq++ ;
++		*wait_seq = seq;
++	}
++
++	hdr = (struct wed_cmd_hdr *)skb_push(skb, sizeof(*hdr));
++
++	hdr->cmd_id = cmd;
++	hdr->length = cpu_to_le16(skb->len);
++	hdr->uni_id = seq;
++
++	if (to_id == MODULE_ID_WO)
++		hdr->flag |= WARP_CMD_FLAG_FROM_TO_WO;
++
++	if (wait_resp && wait_seq)
++		hdr->flag |= WARP_CMD_FLAG_NEED_RSP;
++
++	return mtk_wed_wo_q_tx_skb(wo, &wo->q_tx, skb);
++}
++
++static int
++wo_mcu_parse_response(struct mtk_wed_wo *wo, int cmd,
++			  struct sk_buff *skb, int seq)
++{
++	struct wed_cmd_hdr  *hdr;
++
++	if (!skb) {
++		dev_err(wo->hw->dev, "Message %08x (seq %d) timeout\n",
++			cmd, seq);
++		return -ETIMEDOUT;
++	}
++
++	hdr = (struct wed_cmd_hdr *)skb->data;
++	if (seq != hdr->uni_id) {
++		dev_err(wo->hw->dev, "Message %08x (seq %d) with not match uid(%d)\n",
++			cmd, seq, hdr->uni_id);
++		return -EAGAIN;
++	}
++
++	//skb_pull(skb, sizeof(struct wed_cmd_hdr));
++
++	return 0;
++}
++
++int wed_wo_mcu_init(struct mtk_wed_wo *wo)
++{
++	static const struct wed_wo_mcu_ops wo_mcu_ops = {
++		.headroom = sizeof(struct wed_cmd_hdr),
++		.mcu_skb_send_msg = wo_mcu_send_message,
++		.mcu_parse_response = wo_mcu_parse_response,
++		/*TDO .mcu_restart = wo_mcu_restart,*/
++	};
++	unsigned long timeout = jiffies + FW_DL_TIMEOUT;
++	int ret;
++	u32 val;
++
++	wo->mcu_ops = &wo_mcu_ops;
++
++	ret = mtk_wed_get_firmware_region(wo);
++	if (ret)
++		return ret;
++
++	/* set dummy cr */
++	wed_w32(wo->hw->wed_dev, MTK_WED_SCR0 + 4 * WED_DUMMY_CR_FWDL,
++		wo->hw->index + 1);
++
++	ret = mtk_wed_load_firmware(wo);
++	if (ret)
++		return ret;
++
++	do {
++		/* get dummy cr */
++		val = wed_r32(wo->hw->wed_dev, MTK_WED_SCR0 + 4 * WED_DUMMY_CR_FWDL);
++	} while (val != 0 && !time_after(jiffies, timeout));
++
++	if (val)
++		return -EBUSY;
++
++	return 0;
++}
++
++static ssize_t
++mtk_wed_wo_ctrl(struct file *file,
++			 const char __user *user_buf,
++			 size_t count,
++			 loff_t *ppos)
++{
++	struct mtk_wed_hw *hw = file->private_data;
++	struct mtk_wed_wo *wo = hw->wed_wo;
++	char buf[100], *cmd = NULL, *input[11] = {0};
++	char msgbuf[128] = {0};
++	struct wo_cmd_query *query = (struct wo_cmd_query *)msgbuf;
++	u32 cmd_id;
++	bool wait = false;
++	char *sub_str = NULL;
++	int  input_idx = 0, input_total = 0, scan_num = 0;
++	char *p;
++
++	if (count > sizeof(buf))
++		return -EINVAL;
++
++	if (copy_from_user(buf, user_buf, count))
++		return -EFAULT;
++
++	if (count && buf[count - 1] == '\n')
++		buf[count - 1] = '\0';
++	else
++		buf[count] = '\0';
++
++	p = buf;
++
++	while ((sub_str = strsep(&p, " ")) != NULL) {
++		input[input_idx] = sub_str;
++		input_idx++;
++		input_total++;
++	}
++	cmd = input[0];
++	if (input_total == 1 && cmd) {
++		if (strncmp(cmd, "bainfo", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_BA_INFO_DUMP;
++		} else if (strncmp(cmd, "bactrl", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_BA_CTRL_DUMP;
++		} else if (strncmp(cmd, "fbcmdq", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_FBCMD_Q_DUMP;
++		} else if (strncmp(cmd, "logflush", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_LOG_FLUSH;
++		} else if (strncmp(cmd, "cpustat.dump", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_CPU_STATS_DUMP;
++		} else if (strncmp(cmd, "state", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_WED_RX_STAT;
++		} else if (strncmp(cmd, "prof_hit_dump", strlen(cmd)) == 0) {
++			//wo_profiling_report();
++			return count;
++		} else if (strncmp(cmd, "rxcnt_info", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_RXCNT_INFO;
++			wait = true;
++		} else {
++			pr_info("(%s) unknown comand string(%s)!\n", __func__, cmd);
++			 return count;
++		}
++	}  else if (input_total > 1) {
++		for (input_idx = 1 ; input_idx < input_total ; input_idx++) {
++			scan_num = sscanf(input[input_idx], "%u", &query->query0+(input_idx - 1));
++
++			if (scan_num < 1) {
++				pr_info("(%s) require more input!\n", __func__);
++				return count;
++			}
++		}
++		if(strncmp(cmd, "devinfo", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_DEV_INFO_DUMP;
++		} else if (strncmp(cmd, "bssinfo", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_BSS_INFO_DUMP;
++		} else if (strncmp(cmd, "starec", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_STA_REC_DUMP;
++		} else if (strncmp(cmd, "starec_ba", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_STA_BA_DUMP;
++		} else if (strncmp(cmd, "logctrl", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_FW_LOG_CTRL;
++		} else if (strncmp(cmd, "cpustat.en", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_CPU_STATS_ENABLE;
++		} else if (strncmp(cmd, "prof_conf", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_PROF_CTRL;
++		} else if (strncmp(cmd, "rxcnt_ctrl", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_RXCNT_CTRL;
++		} else if (strncmp(cmd, "dbg_set", strlen(cmd)) == 0) {
++			cmd_id = WO_CMD_DBG_INFO;
++		}
++	} else {
++		dev_info(hw->dev, "usage: echo cmd='cmd_str' > wo_write\n");
++		dev_info(hw->dev, "cmd_str value range:\n");
++		dev_info(hw->dev, "\tbainfo:\n");
++		dev_info(hw->dev, "\tbactrl:\n");
++		dev_info(hw->dev, "\tfbcmdq:\n");
++		dev_info(hw->dev, "\tlogflush:\n");
++		dev_info(hw->dev, "\tcpustat.dump:\n");
++		dev_info(hw->dev, "\tprof_hit_dump:\n");
++		dev_info(hw->dev, "\trxcnt_info:\n");
++		dev_info(hw->dev, "\tdevinfo:\n");
++		dev_info(hw->dev, "\tbssinfo:\n");
++		dev_info(hw->dev, "\tstarec:\n");
++		dev_info(hw->dev, "\tstarec_ba:\n");
++		dev_info(hw->dev, "\tlogctrl:\n");
++		dev_info(hw->dev, "\tcpustat.en:\n");
++		dev_info(hw->dev, "\tprof_conf:\n");
++		dev_info(hw->dev, "\trxcnt_ctrl:\n");
++		dev_info(hw->dev, "\tdbg_set [level] [category]:\n");
++		return count;
++	}
++
++	mtk_wed_mcu_send_msg(wo, MODULE_ID_WO, cmd_id, (void *)msgbuf, sizeof(struct wo_cmd_query), wait);
++
++	return count;
++
++}
++
++static const struct file_operations fops_wo_ctrl = {
++	.write = mtk_wed_wo_ctrl,
++	.open = simple_open,
++	.llseek = default_llseek,
++};
++
++void wed_wo_mcu_debugfs(struct mtk_wed_hw *hw, struct dentry *dir)
++{
++	if (!dir)
++		return;
++
++	debugfs_create_file("wo_write", 0600, dir, hw, &fops_wo_ctrl);
++}
++
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.h b/drivers/net/ethernet/mediatek/mtk_wed_mcu.h
+new file mode 100644
+index 000000000..6a5ac7672
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.h
+@@ -0,0 +1,125 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++#ifndef __MTK_WED_MCU_H
++#define __MTK_WED_MCU_H
++
++#define EXCEPTION_LOG_SIZE		32768
++#define WOCPU_MCUSYS_RESET_ADDR		0x15194050
++#define WOCPU_WO0_MCUSYS_RESET_MASK 	0x20
++#define WOCPU_WO1_MCUSYS_RESET_MASK 	0x1
++
++#define WARP_INVALID_LENGTH_STATUS (-2)
++#define WARP_NULL_POINTER_STATUS (-3)
++#define WARP_INVALID_PARA_STATUS (-4)
++#define WARP_NOT_HANDLE_STATUS (-5)
++#define WARP_FAIL_STATUS (-1)
++#define WARP_OK_STATUS (0)
++#define WARP_ALREADY_DONE_STATUS (1)
++
++#define MT7986_FIRMWARE_WO_1		"mediatek/mt7986_wo_0.bin"
++#define MT7986_FIRMWARE_WO_2		"mediatek/mt7986_wo_1.bin"
++
++#define WOCPU_EMI_DEV_NODE		"mediatek,wocpu_emi"
++#define WOCPU_ILM_DEV_NODE		"mediatek,wocpu_ilm"
++#define WOCPU_DLM_DEV_NODE		"mediatek,wocpu_dlm"
++#define WOCPU_DATA_DEV_NODE		"mediatek,wocpu_data"
++#define WOCPU_BOOT_DEV_NODE		"mediatek,wocpu_boot"
++
++#define FW_DL_TIMEOUT		((3000 * HZ) / 1000)
++#define WOCPU_TIMEOUT		((1000 * HZ) / 1000)
++
++#define MAX_REGION_SIZE	3
++
++#define WOX_MCU_CFG_LS_BASE	0 /*0x15194000*/
++
++#define WOX_MCU_CFG_LS_HW_VER_ADDR		(WOX_MCU_CFG_LS_BASE + 0x000) // 4000
++#define WOX_MCU_CFG_LS_FW_VER_ADDR		(WOX_MCU_CFG_LS_BASE + 0x004) // 4004
++#define WOX_MCU_CFG_LS_CFG_DBG1_ADDR		(WOX_MCU_CFG_LS_BASE + 0x00C) // 400C
++#define WOX_MCU_CFG_LS_CFG_DBG2_ADDR 		(WOX_MCU_CFG_LS_BASE + 0x010) // 4010
++#define WOX_MCU_CFG_LS_WF_MCCR_ADDR		(WOX_MCU_CFG_LS_BASE + 0x014) // 4014
++#define WOX_MCU_CFG_LS_WF_MCCR_SET_ADDR		(WOX_MCU_CFG_LS_BASE + 0x018) // 4018
++#define WOX_MCU_CFG_LS_WF_MCCR_CLR_ADDR		(WOX_MCU_CFG_LS_BASE + 0x01C) // 401C
++#define WOX_MCU_CFG_LS_WF_MCU_CFG_WM_WA_ADDR	(WOX_MCU_CFG_LS_BASE + 0x050) // 4050
++#define WOX_MCU_CFG_LS_WM_BOOT_ADDR_ADDR 	(WOX_MCU_CFG_LS_BASE + 0x060) // 4060
++#define WOX_MCU_CFG_LS_WA_BOOT_ADDR_ADDR	(WOX_MCU_CFG_LS_BASE + 0x064) // 4064
++
++#define WOX_MCU_CFG_LS_WF_MCU_CFG_WM_WA_WM_CPU_RSTB_MASK	BIT(5)
++#define WOX_MCU_CFG_LS_WF_MCU_CFG_WM_WA_WA_CPU_RSTB_MASK	BIT(0)
++
++
++enum wo_event_id {
++	WO_EVT_LOG_DUMP = 0x1,
++	WO_EVT_PROFILING = 0x2,
++	WO_EVT_RXCNT_INFO = 0x3
++};
++
++enum wo_cmd_id {
++	WO_CMD_WED_CFG = 0,
++	WO_CMD_WED_RX_STAT,
++	WO_CMD_RRO_SER,
++	WO_CMD_DBG_INFO,
++	WO_CMD_DEV_INFO,
++	WO_CMD_BSS_INFO,
++	WO_CMD_STA_REC,
++	WO_CMD_DEV_INFO_DUMP,
++	WO_CMD_BSS_INFO_DUMP,
++	WO_CMD_STA_REC_DUMP,
++	WO_CMD_BA_INFO_DUMP,
++	WO_CMD_FBCMD_Q_DUMP,
++	WO_CMD_FW_LOG_CTRL,
++	WO_CMD_LOG_FLUSH,
++	WO_CMD_CHANGE_STATE,
++	WO_CMD_CPU_STATS_ENABLE,
++	WO_CMD_CPU_STATS_DUMP,
++	WO_CMD_EXCEPTION_INIT,
++	WO_CMD_PROF_CTRL,
++	WO_CMD_STA_BA_DUMP,
++	WO_CMD_BA_CTRL_DUMP,
++	WO_CMD_RXCNT_CTRL,
++	WO_CMD_RXCNT_INFO,
++	WO_CMD_SET_CAP,
++	WO_CMD_CCIF_RING_DUMP,
++	WO_CMD_WED_END
++};
++
++enum wo_state {
++	WO_STATE_UNDEFINED 	= 0x0,
++	WO_STATE_INIT 		= 0x1,
++	WO_STATE_ENABLE		= 0x2,
++	WO_STATE_DISABLE	= 0x3,
++	WO_STATE_HALT		= 0x4,
++	WO_STATE_GATING		= 0x5,
++	WO_STATE_SER_RESET 	= 0x6,
++	WO_STATE_WF_RESET	= 0x7,
++	WO_STATE_END
++};
++
++enum wo_done_state {
++	WOIF_UNDEFINED		= 0,
++	WOIF_DISABLE_DONE 	= 1,
++	WOIF_TRIGGER_ENABLE	= 2,
++	WOIF_ENABLE_DONE	= 3,
++	WOIF_TRIGGER_GATING	= 4,
++	WOIF_GATING_DONE	= 5,
++	WOIF_TRIGGER_HALT	= 6,
++	WOIF_HALT_DONE		= 7,
++};
++
++enum wed_dummy_cr_idx {
++	WED_DUMMY_CR_FWDL = 0,
++	WED_DUMMY_CR_WO_STATUS = 1
++};
++
++struct mtk_wed_fw_trailer {
++	u8 chip_id;
++	u8 eco_code;
++	u8 n_region;
++	u8 format_ver;
++	u8 format_flag;
++	u8 reserved[2];
++	char fw_ver[10];
++	char build_date[15];
++	u32 crc;
++};
++
++#endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_regs.h b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+index 69f136ed4..e911b5315 100644
+--- a/drivers/net/ethernet/mediatek/mtk_wed_regs.h
++++ b/drivers/net/ethernet/mediatek/mtk_wed_regs.h
+@@ -4,6 +4,8 @@
+ #ifndef __MTK_WED_REGS_H
+ #define __MTK_WED_REGS_H
+ 
++#define MTK_WFDMA_DESC_CTRL_TO_HOST		BIT(8)
++
+ #if defined(CONFIG_MEDIATEK_NETSYS_V2)
+ #define MTK_WDMA_DESC_CTRL_LEN1			GENMASK(13, 0)
+ #define MTK_WDMA_DESC_CTRL_LAST_SEG1		BIT(14)
+@@ -16,6 +18,7 @@
+ #define MTK_WDMA_DESC_CTRL_LEN0			GENMASK(29, 16)
+ #define MTK_WDMA_DESC_CTRL_LAST_SEG0		BIT(30)
+ #define MTK_WDMA_DESC_CTRL_DMA_DONE		BIT(31)
++#define MTK_WED_RX_BM_TOKEN			GENMASK(31, 16)
+ 
+ struct mtk_wdma_desc {
+ 	__le32 buf0;
+@@ -37,6 +40,8 @@ struct mtk_wdma_desc {
+ #define MTK_WED_RESET_WED_TX_DMA			BIT(12)
+ #define MTK_WED_RESET_WDMA_RX_DRV			BIT(17)
+ #define MTK_WED_RESET_WDMA_INT_AGENT			BIT(19)
++#define MTK_WED_RESET_RX_RRO_QM				BIT(20)
++#define MTK_WED_RESET_RX_ROUTE_QM			BIT(21)
+ #define MTK_WED_RESET_WED				BIT(31)
+ 
+ #define MTK_WED_CTRL					0x00c
+@@ -48,8 +53,12 @@ struct mtk_wdma_desc {
+ #define MTK_WED_CTRL_WED_TX_BM_BUSY			BIT(9)
+ #define MTK_WED_CTRL_WED_TX_FREE_AGENT_EN		BIT(10)
+ #define MTK_WED_CTRL_WED_TX_FREE_AGENT_BUSY		BIT(11)
+-#define MTK_WED_CTRL_RESERVE_EN				BIT(12)
+-#define MTK_WED_CTRL_RESERVE_BUSY			BIT(13)
++#define MTK_WED_CTRL_WED_RX_BM_EN			BIT(12)
++#define MTK_WED_CTRL_WED_RX_BM_BUSY			BIT(13)
++#define MTK_WED_CTRL_RX_RRO_QM_EN			BIT(14)
++#define MTK_WED_CTRL_RX_RRO_QM_BUSY			BIT(15)
++#define MTK_WED_CTRL_RX_ROUTE_QM_EN			BIT(16)
++#define MTK_WED_CTRL_RX_ROUTE_QM_BUSY			BIT(17)
+ #define MTK_WED_CTRL_FINAL_DIDX_READ			BIT(24)
+ #define MTK_WED_CTRL_ETH_DMAD_FMT			BIT(25)
+ #define MTK_WED_CTRL_MIB_READ_CLEAR			BIT(28)
+@@ -64,8 +73,8 @@ struct mtk_wdma_desc {
+ #define MTK_WED_EXT_INT_STATUS_TX_TKID_LO_TH		BIT(10)
+ #define MTK_WED_EXT_INT_STATUS_TX_TKID_HI_TH		BIT(11)
+ #endif
+-#define MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH		BIT(12)
+-#define MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH		BIT(13)
++#define MTK_WED_EXT_INT_STATUS_RX_FREE_AT_EMPTY		BIT(12)
++#define MTK_WED_EXT_INT_STATUS_RX_FBUF_DMAD_ER		BIT(13)
+ #define MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR	BIT(16)
+ #define MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR	BIT(17)
+ #define MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT		BIT(18)
+@@ -82,8 +91,8 @@ struct mtk_wdma_desc {
+ #define MTK_WED_EXT_INT_STATUS_ERROR_MASK		(MTK_WED_EXT_INT_STATUS_TF_LEN_ERR | \
+ 							 MTK_WED_EXT_INT_STATUS_TKID_WO_PYLD | \
+ 							 MTK_WED_EXT_INT_STATUS_TKID_TITO_INVALID | \
+-							 MTK_WED_EXT_INT_STATUS_RX_FBUF_LO_TH | \
+-							 MTK_WED_EXT_INT_STATUS_RX_FBUF_HI_TH | \
++							 MTK_WED_EXT_INT_STATUS_RX_FREE_AT_EMPTY | \
++							 MTK_WED_EXT_INT_STATUS_RX_FBUF_DMAD_ER | \
+ 							 MTK_WED_EXT_INT_STATUS_RX_DRV_R_RESP_ERR | \
+ 							 MTK_WED_EXT_INT_STATUS_RX_DRV_W_RESP_ERR | \
+ 							 MTK_WED_EXT_INT_STATUS_RX_DRV_COHERENT | \
+@@ -92,6 +101,8 @@ struct mtk_wdma_desc {
+ 							 MTK_WED_EXT_INT_STATUS_TX_DMA_W_RESP_ERR)
+ 
+ #define MTK_WED_EXT_INT_MASK				0x028
++#define MTK_WED_EXT_INT_MASK1				0x02c
++#define MTK_WED_EXT_INT_MASK2				0x030
+ 
+ #define MTK_WED_STATUS					0x060
+ #define MTK_WED_STATUS_TX				GENMASK(15, 8)
+@@ -179,6 +190,9 @@ struct mtk_wdma_desc {
+ 
+ #define MTK_WED_RING_RX(_n)				(0x400 + (_n) * 0x10)
+ 
++#define MTK_WED_RING_RX_DATA(_n)			(0x420 + (_n) * 0x10)
++
++#define MTK_WED_SCR0					0x3c0
+ #define MTK_WED_WPDMA_INT_TRIGGER			0x504
+ #define MTK_WED_WPDMA_INT_TRIGGER_RX_DONE		BIT(1)
+ #define MTK_WED_WPDMA_INT_TRIGGER_TX_DONE		GENMASK(5, 4)
+@@ -235,13 +249,19 @@ struct mtk_wdma_desc {
+ 
+ #define MTK_WED_WPDMA_INT_CTRL_TX			0x530
+ #define MTK_WED_WPDMA_INT_CTRL_TX0_DONE_EN 		BIT(0)
+-#define MTK_WED_WPDMA_INT_CTRL_TX0_DONE_CLR 		BIT(1)
++#define MTK_WED_WPDMA_INT_CTRL_TX0_DONE_CLR		BIT(1)
+ #define MTK_WED_WPDMA_INT_CTRL_TX0_DONE_TRIG		GENMASK(6, 2)
+ #define MTK_WED_WPDMA_INT_CTRL_TX1_DONE_EN		BIT(8)
+ #define MTK_WED_WPDMA_INT_CTRL_TX1_DONE_CLR		BIT(9)
+ #define MTK_WED_WPDMA_INT_CTRL_TX1_DONE_TRIG		GENMASK(14, 10)
+ 
+ #define MTK_WED_WPDMA_INT_CTRL_RX			0x534
++#define MTK_WED_WPDMA_INT_CTRL_RX0_EN			BIT(0)
++#define MTK_WED_WPDMA_INT_CTRL_RX0_CLR			BIT(1)
++#define MTK_WED_WPDMA_INT_CTRL_RX0_DONE_TRIG		GENMASK(6, 2)
++#define MTK_WED_WPDMA_INT_CTRL_RX1_EN			BIT(8)
++#define MTK_WED_WPDMA_INT_CTRL_RX1_CLR			BIT(9)
++#define MTK_WED_WPDMA_INT_CTRL_RX1_DONE_TRIG		GENMASK(14, 10)
+ 
+ #define MTK_WED_WPDMA_INT_CTRL_TX_FREE			0x538
+ #define MTK_WED_WPDMA_INT_CTRL_TX_FREE_DONE_EN		BIT(0)
+@@ -266,13 +286,43 @@ struct mtk_wdma_desc {
+ #define MTK_WED_WPDMA_TX_MIB(_n)			(0x5a0 + (_n) * 4)
+ #define MTK_WED_WPDMA_TX_COHERENT_MIB(_n)		(0x5d0 + (_n) * 4)
+ 
++#define MTK_WED_WPDMA_RX_MIB(_n)			(0x5e0 + (_n) * 4)
++#define MTK_WED_WPDMA_RX_COHERENT_MIB(_n)		(0x5f0 + (_n) * 4)
++
+ #define MTK_WED_WPDMA_RING_TX(_n)			(0x600 + (_n) * 0x10)
+ #define MTK_WED_WPDMA_RING_RX(_n)			(0x700 + (_n) * 0x10)
++#define MTK_WED_WPDMA_RING_RX_DATA(_n)		(0x730 + (_n) * 0x10)
++
++
++#define MTK_WED_WPDMA_RX_D_GLO_CFG			0x75c
++#define MTK_WED_WPDMA_RX_D_RX_DRV_EN			BIT(0)
++#define MTK_WED_WPDMA_RX_D_INIT_PHASE_RXEN_SEL		GENMASK(11, 7)
++#define MTK_WED_WPDMA_RX_D_RXD_READ_LEN			GENMASK(31, 24)
++
++#define MTK_WED_WPDMA_RX_D_RST_IDX			0x760
++#define MTK_WED_WPDMA_RX_D_RST_CRX_IDX0			BIT(16)
++#define MTK_WED_WPDMA_RX_D_RST_CRX_IDX1			BIT(17)
++#define MTK_WED_WPDMA_RX_D_RST_DRV_IDX0			BIT(24)
++#define MTK_WED_WPDMA_RX_D_RST_DRV_IDX1			BIT(25)
++
++#define MTK_WED_WPDMA_RX_GLO_CFG			0x76c
++#define MTK_WED_WPDMA_RX_RING				0x770
++
++#define MTK_WED_WPDMA_RX_D_MIB(_n)			(0x774 + (_n) * 4)
++#define MTK_WED_WPDMA_RX_D_PROCESSED_MIB(_n)		(0x784 + (_n) * 4)
++#define MTK_WED_WPDMA_RX_D_COHERENT_MIB			0x78c
++
++#define MTK_WED_WDMA_RING_TX				0x800
++
++#define MTK_WED_WDMA_TX_MIB				0x810
++
++
+ #define MTK_WED_WDMA_RING_RX(_n)			(0x900 + (_n) * 0x10)
+ #define MTK_WED_WDMA_RX_THRES(_n)			(0x940 + (_n) * 0x4)
+ 
+ #define MTK_WED_WDMA_GLO_CFG				0xa04
+ #define MTK_WED_WDMA_GLO_CFG_TX_DRV_EN			BIT(0)
++#define MTK_WED_WDMA_GLO_CFG_TX_DDONE_CHK		BIT(1)
+ #define MTK_WED_WDMA_GLO_CFG_RX_DRV_EN			BIT(2)
+ #define MTK_WED_WDMA_GLO_CFG_RX_DRV_BUSY		BIT(3)
+ #define MTK_WED_WDMA_GLO_CFG_BT_SIZE			GENMASK(5, 4)
+@@ -316,6 +366,20 @@ struct mtk_wdma_desc {
+ #define MTK_WED_WDMA_RX_RECYCLE_MIB(_n)			(0xae8 + (_n) * 4)
+ #define MTK_WED_WDMA_RX_PROCESSED_MIB(_n)		(0xaf0 + (_n) * 4)
+ 
++#define MTK_WED_RX_BM_RX_DMAD				0xd80
++#define MTK_WED_RX_BM_RX_DMAD_SDL0			GENMASK(13, 0)
++
++#define MTK_WED_RX_BM_BASE				0xd84
++#define MTK_WED_RX_BM_INIT_PTR				0xd88
++#define MTK_WED_RX_BM_SW_TAIL 				GENMASK(15, 0)
++#define MTK_WED_RX_BM_INIT_SW_TAIL			BIT(16)
++
++#define MTK_WED_RX_PTR					0xd8c
++
++#define MTK_WED_RX_BM_DYN_ALLOC_TH			0xdb4
++#define MTK_WED_RX_BM_DYN_ALLOC_TH_H			GENMASK(31, 16)
++#define MTK_WED_RX_BM_DYN_ALLOC_TH_L			GENMASK(15, 0)
++
+ #define MTK_WED_RING_OFS_BASE				0x00
+ #define MTK_WED_RING_OFS_COUNT				0x04
+ #define MTK_WED_RING_OFS_CPU_IDX			0x08
+@@ -355,4 +419,71 @@ struct mtk_wdma_desc {
+ /* DMA channel mapping */
+ #define HIFSYS_DMA_AG_MAP				0x008
+ 
++#define MTK_WED_RTQM_GLO_CFG				0xb00
++#define MTK_WED_RTQM_BUSY	 			BIT(1)
++#define MTK_WED_RTQM_Q_RST	 			BIT(2)
++#define MTK_WED_RTQM_Q_DBG_BYPASS			BIT(5)
++#define MTK_WED_RTQM_TXDMAD_FPORT			GENMASK(23, 20)
++
++#define MTK_WED_RTQM_R2H_MIB(_n)			(0xb70 + (_n) * 0x4)
++#define MTK_WED_RTQM_R2Q_MIB(_n)			(0xb78 + (_n) * 0x4)
++#define MTK_WED_RTQM_Q2N_MIB				0xb80
++#define MTK_WED_RTQM_Q2H_MIB(_n)			(0xb84 + (_n) * 0x4)
++
++#define MTK_WED_RTQM_Q2B_MIB				0xb8c
++#define MTK_WED_RTQM_PFDBK_MIB				0xb90
++
++#define MTK_WED_RROQM_GLO_CFG				0xc04
++#define MTK_WED_RROQM_RST_IDX				0xc08
++#define MTK_WED_RROQM_RST_IDX_MIOD 			BIT(0)
++#define MTK_WED_RROQM_RST_IDX_FDBK 			BIT(4)
++
++#define MTK_WED_RROQM_MIOD_CTRL0			0xc40
++#define MTK_WED_RROQM_MIOD_CTRL1			0xc44
++#define MTK_WED_RROQM_MIOD_CNT 				GENMASK(11, 0)
++
++#define MTK_WED_RROQM_MIOD_CTRL2			0xc48
++#define MTK_WED_RROQM_MIOD_CTRL3			0xc4c
++
++#define MTK_WED_RROQM_FDBK_CTRL0			0xc50
++#define MTK_WED_RROQM_FDBK_CTRL1			0xc54
++#define MTK_WED_RROQM_FDBK_CNT 				GENMASK(11, 0)
++
++#define MTK_WED_RROQM_FDBK_CTRL2			0xc58
++
++#define MTK_WED_RROQ_BASE_L				0xc80
++#define MTK_WED_RROQ_BASE_H				0xc84
++
++
++#define MTK_WED_RROQM_MIOD_CFG                          0xc8c
++#define MTK_WED_RROQM_MIOD_MID_DW 			GENMASK(5, 0)
++#define MTK_WED_RROQM_MIOD_MOD_DW			GENMASK(13, 8)
++#define MTK_WED_RROQM_MIOD_ENTRY_DW			GENMASK(22, 16)
++
++#define MTK_WED_RROQM_MID_MIB				0xcc0
++#define MTK_WED_RROQM_MOD_MIB				0xcc4
++#define MTK_WED_RROQM_MOD_COHERENT_MIB			0xcc8
++#define MTK_WED_RROQM_FDBK_MIB				0xcd0
++#define MTK_WED_RROQM_FDBK_COHERENT_MIB			0xcd4
++#define MTK_WED_RROQM_FDBK_IND_MIB			0xce0
++#define MTK_WED_RROQM_FDBK_ENQ_MIB			0xce4
++#define MTK_WED_RROQM_FDBK_ANC_MIB			0xce8
++#define MTK_WED_RROQM_FDBK_ANC2H_MIB			0xcec
++
++#define MTK_WED_RX_BM_RX_DMAD  				0xd80
++#define MTK_WED_RX_BM_BASE				0xd84
++#define MTK_WED_RX_BM_INIT_PTR				0xd88
++#define MTK_WED_RX_BM_PTR	      			0xd8c
++#define MTK_WED_RX_BM_PTR_HEAD				GENMASK(32, 16)
++#define MTK_WED_RX_BM_PTR_TAIL				GENMASK(15, 0)
++
++#define MTK_WED_RX_BM_BLEN	      			0xd90
++#define MTK_WED_RX_BM_STS				0xd94
++#define MTK_WED_RX_BM_INTF2				0xd98
++#define MTK_WED_RX_BM_INTF				0xd9c
++#define MTK_WED_RX_BM_ERR_STS				0xda8
++
++#define MTK_WED_WOCPU_VIEW_MIOD_BASE		 	0x8000
++#define MTK_WED_PCIE_INT_MASK				0x0
++
+ #endif
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
+new file mode 100644
+index 000000000..10618fc1a
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
+@@ -0,0 +1,548 @@
++// SPDX-License-Identifier: GPL-2.0-only
++
++#include <linux/kernel.h>
++#include <linux/bitfield.h>
++#include <linux/dma-mapping.h>
++#include <linux/skbuff.h>
++#include <linux/of_platform.h>
++#include <linux/interrupt.h>
++#include <linux/of_address.h>
++#include <linux/iopoll.h>
++#include <linux/soc/mediatek/mtk_wed.h>
++#include "mtk_wed.h"
++#include "mtk_wed_regs.h"
++#include "mtk_wed_ccif.h"
++#include "mtk_wed_wo.h"
++
++struct wed_wo_profile_stat profile_total[6] = {
++	{1001, 0},
++	{1501, 0},
++	{3001, 0},
++	{5001, 0},
++	{10001, 0},
++	{0xffffffff, 0}
++};
++
++struct wed_wo_profile_stat profiling_mod[6] = {
++	{1001, 0},
++	{1501, 0},
++	{3001, 0},
++	{5001, 0},
++	{10001, 0},
++	{0xffffffff, 0}
++};
++
++struct wed_wo_profile_stat profiling_rro[6] = {
++	{1001, 0},
++	{1501, 0},
++	{3001, 0},
++	{5001, 0},
++	{10001, 0},
++	{0xffffffff, 0}
++};
++
++static void
++woif_q_sync_idx(struct mtk_wed_wo *wo, struct wed_wo_queue *q)
++{
++	woccif_w32(wo, q->regs->desc_base, q->desc_dma);
++	woccif_w32(wo, q->regs->ring_size, q->ndesc);
++
++	/* wo fw start from 1 */
++	q->head = woccif_r32(wo, q->regs->dma_idx) + 1;
++	q->tail = q->head;
++}
++
++static void
++woif_q_reset(struct mtk_wed_wo *dev, struct wed_wo_queue *q)
++{
++
++	if (!q || !q->ndesc)
++		return;
++
++	woccif_w32(dev, q->regs->cpu_idx, 0);
++
++	woif_q_sync_idx(dev, q);
++}
++
++static void
++woif_q_kick(struct mtk_wed_wo *wo, struct wed_wo_queue *q, int offset)
++{
++	wmb();
++	woccif_w32(wo, q->regs->cpu_idx, q->head + offset);
++}
++
++static int
++woif_q_rx_fill(struct mtk_wed_wo *wo, struct wed_wo_queue *q)
++{
++	int len = q->buf_size, frames = 0;
++	struct wed_wo_queue_entry *entry;
++	struct wed_wo_desc *desc;
++	dma_addr_t addr;
++	u32 ctrl = 0;
++	void *buf;
++
++	if (!q->ndesc)
++		return 0;
++
++	spin_lock_bh(&q->lock);
++
++	while (q->queued < q->ndesc - 1) {
++
++		buf = page_frag_alloc(&q->rx_page, len, GFP_ATOMIC);
++		if (!buf)
++			break;
++
++		addr = dma_map_single(wo->hw->dev, buf, len, DMA_FROM_DEVICE);
++		if (unlikely(dma_mapping_error(wo->hw->dev, addr))) {
++			skb_free_frag(buf);
++			break;
++		}
++		dma_sync_single_for_cpu(wo->hw->dev, addr, len,
++					DMA_TO_DEVICE);
++		desc = &q->desc[q->head];
++		entry = &q->entry[q->head];
++
++		entry->dma_addr = addr;
++		entry->dma_len = len;
++
++		ctrl = FIELD_PREP(WED_CTL_SD_LEN0, entry->dma_len);
++		ctrl |= WED_CTL_LAST_SEC0;
++
++		WRITE_ONCE(desc->buf0, cpu_to_le32(addr));
++		WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
++		dma_sync_single_for_device(wo->hw->dev, addr, len,
++					DMA_TO_DEVICE);
++		q->queued++;
++		q->entry[q->head].buf = buf;
++
++		q->head = (q->head + 1) % q->ndesc;
++		frames++;
++	}
++
++	spin_unlock_bh(&q->lock);
++
++	return frames;
++}
++
++static void
++woif_q_rx_fill_process(struct mtk_wed_wo *wo, struct wed_wo_queue *q)
++{
++	if(woif_q_rx_fill(wo, q))
++		woif_q_kick(wo, q, -1);
++}
++
++static int
++woif_q_alloc(struct mtk_wed_wo *dev, struct wed_wo_queue *q,
++		     int n_desc, int bufsize, int idx,
++		     struct wed_wo_queue_regs *regs)
++{
++	struct wed_wo_queue_regs *q_regs;
++	int size;
++
++	spin_lock_init(&q->lock);
++	spin_lock_init(&q->cleanup_lock);
++
++	q_regs = devm_kzalloc(dev->hw->dev, sizeof(*q_regs), GFP_KERNEL);
++
++	q_regs->desc_base = regs->desc_base;
++	q_regs->ring_size = regs->ring_size;
++	q_regs->cpu_idx = regs->cpu_idx;
++	q_regs->dma_idx = regs->dma_idx;
++
++	q->regs = q_regs;
++	q->ndesc = n_desc;
++	q->buf_size = bufsize;
++
++	size = q->ndesc * sizeof(struct wed_wo_desc);
++
++	q->desc = dmam_alloc_coherent(dev->hw->dev, size,
++				      &q->desc_dma, GFP_KERNEL);
++	if (!q->desc)
++		return -ENOMEM;
++
++	size = q->ndesc * sizeof(*q->entry);
++	q->entry = devm_kzalloc(dev->hw->dev, size, GFP_KERNEL);
++	if (!q->entry)
++		return -ENOMEM;
++
++	if (idx == 0)
++		woif_q_reset(dev, &dev->q_tx);
++
++	return 0;
++}
++
++static void
++woif_q_tx_clean(struct mtk_wed_wo *wo, struct wed_wo_queue *q, bool flush)
++{
++	int last;
++
++	if (!q || !q->ndesc)
++		return;
++
++	spin_lock_bh(&q->cleanup_lock);
++	if (flush)
++		last = -1;
++	else
++		last = readl(&q->regs->dma_idx);
++
++	while (q->queued > 0 && q->tail != last) {
++		struct wed_wo_queue_entry *e;
++
++		e = &q->entry[q->tail];
++
++		dma_unmap_single(wo->hw->dev, e->dma_addr, e->dma_len,
++				 DMA_TO_DEVICE);
++
++		if (e->skb)
++			dev_kfree_skb(e->skb);
++
++		memset(e, 0, sizeof(*e));
++
++		spin_lock_bh(&q->lock);
++		q->tail = (q->tail + 1) % q->ndesc;
++		q->queued--;
++		spin_unlock_bh(&q->lock);
++
++		if (!flush && q->tail == last)
++			last = readl(&q->regs->dma_idx);
++	}
++	spin_unlock_bh(&q->cleanup_lock);
++
++	if (flush) {
++		spin_lock_bh(&q->lock);
++		woif_q_sync_idx(wo, q);
++		woif_q_kick(wo, q, 0);
++		spin_unlock_bh(&q->lock);
++	}
++}
++
++static void
++woif_q_rx_clean(struct mtk_wed_wo *wo, struct wed_wo_queue *q)
++{
++}
++
++static void *
++woif_q_deq(struct mtk_wed_wo *wo, struct wed_wo_queue *q, bool flush,
++		 int *len, u32 *info, bool *more)
++{
++	int buf_len = SKB_WITH_OVERHEAD(q->buf_size);
++	struct wed_wo_queue_entry *e;
++	struct wed_wo_desc *desc;
++	int idx = q->tail;
++	void *buf;
++
++	*more = false;
++	if (!q->queued)
++		return NULL;
++
++	if (flush)
++		q->desc[idx].ctrl |= cpu_to_le32(WED_CTL_DMA_DONE);
++	else if (!(q->desc[idx].ctrl & cpu_to_le32(WED_CTL_DMA_DONE)))
++		return NULL;
++
++	q->tail = (q->tail + 1) % q->ndesc;
++	q->queued--;
++
++	desc = &q->desc[idx];
++	e = &q->entry[idx];
++
++	buf = e->buf;
++	if (len) {
++		u32 ctl = le32_to_cpu(READ_ONCE(desc->ctrl));
++		*len = FIELD_GET(WED_CTL_SD_LEN0, ctl);
++		*more = !(ctl & WED_CTL_LAST_SEC0);
++	}
++
++	if (info)
++		*info = le32_to_cpu(desc->info);
++	if(buf)
++		dma_unmap_single(wo->hw->dev, e->dma_addr, buf_len,
++				 DMA_FROM_DEVICE);
++	e->skb = NULL;
++
++	return buf;
++}
++
++static int
++woif_q_init(struct mtk_wed_wo *dev,
++	       int (*poll)(struct napi_struct *napi, int budget))
++{
++	init_dummy_netdev(&dev->napi_dev);
++	snprintf(dev->napi_dev.name, sizeof(dev->napi_dev.name), "%s",
++		 "woif_q");
++
++	if (dev->q_rx.ndesc) {
++		netif_napi_add(&dev->napi_dev, &dev->napi, poll, 64);
++		woif_q_rx_fill(dev, &dev->q_rx);
++		woif_q_reset(dev, &dev->q_rx);
++		napi_enable(&dev->napi);
++	}
++
++	return 0;
++}
++
++void woif_q_rx_skb(struct mtk_wed_wo *wo, struct sk_buff *skb)
++{
++	struct wed_cmd_hdr *hdr = (struct wed_cmd_hdr *)skb->data;
++	int ret;
++
++	ret = mtk_wed_mcu_cmd_sanity_check(wo, skb);
++	if (ret)
++		goto free_skb;
++
++	if (WED_WO_CMD_FLAG_IS_RSP(hdr))
++		mtk_wed_mcu_rx_event(wo, skb);
++	else
++		mtk_wed_mcu_rx_unsolicited_event(wo, skb);
++
++	return;
++free_skb:
++	dev_kfree_skb(skb);
++}
++
++static int
++woif_q_tx_skb(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
++		      struct sk_buff *skb)
++{
++	struct wed_wo_queue_entry *entry;
++	struct wed_wo_desc *desc;
++	int len, ret, idx = -1;
++	dma_addr_t addr;
++	u32 ctrl = 0;
++
++	len = skb->len;
++	addr = dma_map_single(wo->hw->dev, skb->data, len, DMA_TO_DEVICE);
++	if (unlikely(dma_mapping_error(wo->hw->dev, addr)))
++		goto error;
++
++	/* packet tx, force trigger tx clean. */
++	if (q->queued + MTK_WED_WO_TXQ_FREE_THR >= q->ndesc - 1)
++		woif_q_tx_clean(wo, q, false);
++
++	if (q->queued + 1 >= q->ndesc - 1) {
++		ret = -ENOMEM;
++		goto error;
++	}
++
++	spin_lock_bh(&q->lock);
++
++	dma_sync_single_for_device(wo->hw->dev, addr, len,
++					   DMA_TO_DEVICE);
++
++	idx = q->head;
++
++	desc = &q->desc[idx];
++	entry = &q->entry[idx];
++
++	entry->dma_addr = addr;
++	entry->dma_len = len;
++
++	ctrl = FIELD_PREP(WED_CTL_SD_LEN0, len);
++	ctrl |= WED_CTL_LAST_SEC0;
++	ctrl |= WED_CTL_DMA_DONE;
++
++	WRITE_ONCE(desc->buf0, cpu_to_le32(addr));
++	WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
++
++	q->queued++;
++	q->entry[idx].skb = skb;
++
++	woif_q_kick(wo, q, 0);
++	wo->drv_ops->kickout(wo);
++
++	q->head = (q->head + 1) % q->ndesc;
++	spin_unlock_bh(&q->lock);
++	return 0;
++
++error:
++	dev_kfree_skb(skb);
++	return -ENOMEM;
++}
++
++static const struct wed_wo_queue_ops wo_queue_ops = {
++	.init = woif_q_init,
++	.alloc = woif_q_alloc,
++	.reset = woif_q_reset,
++	.tx_skb = woif_q_tx_skb,
++	.tx_clean = woif_q_tx_clean,
++	.rx_clean = woif_q_rx_clean,
++	.kick = woif_q_kick,
++};
++
++static int
++mtk_wed_wo_rx_process(struct mtk_wed_wo *wo, struct wed_wo_queue *q, int budget)
++{
++	int len, data_len, done = 0;
++	struct sk_buff *skb;
++	unsigned char *data;
++	bool more;
++
++	while (done < budget) {
++		u32 info;
++
++		data = woif_q_deq(wo, q, false, &len, &info, &more);
++		if (!data)
++			break;
++
++		data_len = SKB_WITH_OVERHEAD(q->buf_size);
++
++		if (data_len < len) {
++			skb_free_frag(data);
++			continue;
++		}
++
++		skb = build_skb(data, q->buf_size);
++		if (!skb) {
++			skb_free_frag(data);
++			continue;
++		}
++
++		__skb_put(skb, len);
++		done++;
++
++		woif_q_rx_skb(wo, skb);
++	}
++
++	woif_q_rx_fill_process(wo, q);
++
++	return done;
++}
++
++void mtk_wed_wo_set_isr_mask(struct mtk_wed_wo *wo, bool set,
++		       u32 clear, u32 val)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&wo->ccif.irq_lock, flags);
++	wo->ccif.irqmask &= ~clear;
++	wo->ccif.irqmask |= val;
++	if (set)
++		wo->drv_ops->set_isr(wo, wo->ccif.irqmask);
++
++	spin_unlock_irqrestore(&wo->ccif.irq_lock, flags);
++}
++
++static inline void mtk_wed_wo_set_ack_mask(struct mtk_wed_wo *wo, u32 mask)
++{
++	wo->drv_ops->set_ack(wo, mask);
++}
++
++static void mtk_wed_wo_poll_complete(struct mtk_wed_wo *wo)
++{
++	mtk_wed_wo_set_ack_mask(wo, wo->ccif.q_int_mask);
++	mtk_wed_wo_isr_enable(wo, wo->ccif.q_int_mask);
++}
++
++int mtk_wed_wo_rx_poll(struct napi_struct *napi, int budget)
++{
++	struct mtk_wed_wo *wo;
++	int done = 0, cur;
++
++	wo = container_of(napi->dev, struct mtk_wed_wo, napi_dev);
++
++	rcu_read_lock();
++
++	do {
++		cur = mtk_wed_wo_rx_process(wo, &wo->q_rx, budget - done);
++		/* rx packet handle */
++		done += cur;
++	} while (cur && done < budget);
++
++	rcu_read_unlock();
++
++	if (done < budget && napi_complete(napi))
++		mtk_wed_wo_poll_complete(wo);
++
++	return done;
++}
++
++static void mtk_wed_wo_isr_tasklet(unsigned long data)
++{
++	struct mtk_wed_wo *wo = (struct mtk_wed_wo *)data;
++	u32 intr, mask;
++
++	/* disable isr */
++	wo->drv_ops->set_isr(wo, 0);
++
++	intr = wo->drv_ops->get_csr(wo);
++	intr &= wo->ccif.irqmask;
++
++	mask = intr & (wo->ccif.q_int_mask | wo->ccif.q_exep_mask);
++	mtk_wed_wo_isr_disable(wo, mask);
++
++	if (intr & wo->ccif.q_int_mask)
++		napi_schedule(&wo->napi);
++
++	if (intr & wo->ccif.q_exep_mask) {
++		/* todo */
++	}
++}
++
++static irqreturn_t mtk_wed_wo_isr_handler(int irq, void *wo_instance)
++{
++	struct mtk_wed_wo *wo = wo_instance;
++
++	wo->drv_ops->set_isr(wo, 0);
++
++	tasklet_schedule(&wo->irq_tasklet);
++
++	return IRQ_HANDLED;
++}
++
++int mtk_wed_wo_init(struct mtk_wed_hw *hw)
++{
++	struct mtk_wed_wo *wo;
++	int ret = 0;
++
++	wo = kzalloc(sizeof(struct mtk_wed_wo), GFP_KERNEL);
++	if (!wo)
++		return -ENOMEM;
++
++	wo->hw = hw;
++	wo->queue_ops = &wo_queue_ops;
++	hw->wed_wo = wo;
++
++	tasklet_init(&wo->irq_tasklet, mtk_wed_wo_isr_tasklet,
++		     (unsigned long)wo);
++
++	skb_queue_head_init(&wo->mcu.res_q);
++	init_waitqueue_head(&wo->mcu.wait);
++	mutex_init(&wo->mcu.mutex);
++
++	ret = wed_wo_hardware_init(wo, mtk_wed_wo_isr_handler);
++	if (ret)
++		goto error;
++
++	/* fw download */
++	ret = wed_wo_mcu_init(wo);
++	if (ret)
++		goto error;
++
++	ret = mtk_wed_exception_init(wo);
++	if (ret)
++		goto error;
++
++	return ret;
++
++error:
++	kfree(wo);
++
++	return ret;
++}
++
++void mtk_wed_wo_exit(struct mtk_wed_hw *hw)
++{
++/*
++#ifdef CONFIG_WED_HW_RRO_SUPPORT
++		woif_bus_exit(woif);
++		wo_exception_exit(woif);
++#endif
++*/
++	struct mtk_wed_wo *wo = hw->wed_wo;
++
++	if (wo->exp.log) {
++		dma_unmap_single(wo->hw->dev, wo->exp.phys, wo->exp.log_size, DMA_FROM_DEVICE);
++		kfree(wo->exp.log);
++	}
++
++}
+diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.h b/drivers/net/ethernet/mediatek/mtk_wed_wo.h
+new file mode 100644
+index 000000000..00b39e779
+--- /dev/null
++++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.h
+@@ -0,0 +1,334 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
++
++#ifndef __MTK_WED_WO_H
++#define __MTK_WED_WO_H
++
++#include <linux/netdevice.h>
++#include <linux/skbuff.h>
++#include "mtk_wed.h"
++
++#define WED_CTL_SD_LEN1		GENMASK(13, 0)
++#define WED_CTL_LAST_SEC1	BIT(14)
++#define WED_CTL_BURST		BIT(15)
++#define WED_CTL_SD_LEN0_SHIFT	16
++#define WED_CTL_SD_LEN0		GENMASK(29, 16)
++#define WED_CTL_LAST_SEC0	BIT(30)
++#define WED_CTL_DMA_DONE	BIT(31)
++#define WED_INFO_WINFO		GENMASK(15, 0)
++
++#define MTK_WED_WO_TXQ_FREE_THR		10
++
++#define WED_WO_PROFILE_MAX_LVL		6
++
++
++enum mtk_wed_fw_region_id {
++	WO_REGION_EMI = 0,
++	WO_REGION_ILM,
++	WO_REGION_DATA,
++	WO_REGION_BOOT,
++	__WO_REGION_MAX
++};
++
++struct wed_wo_profile_stat {
++	u32 bound;
++	u32 record;
++};
++
++#define PROFILE_STAT(record, val) do {			\
++		u8 lvl = 0;				\
++		while (lvl < WED_WO_PROFILE_MAX_LVL) {	\
++			if (val < record[lvl].bound) {	\
++				record[lvl].record++;	\
++				break;			\
++			}				\
++			lvl++;				\
++		}					\
++	} while (0)
++
++/* align with wo report structure */
++struct wed_wo_log {
++	u32 sn;
++	u32 total;
++	u32 rro;
++	u32 mod;
++};
++
++struct wed_wo_rxcnt {
++	u16 wlan_idx;
++	u16 tid;
++	u32 rx_pkt_cnt;
++	u32 rx_byte_cnt;
++	u32 rx_err_cnt;
++	u32 rx_drop_cnt;
++};
++
++struct wed_wo_queue {
++	struct wed_wo_queue_regs *regs;
++
++	spinlock_t lock;
++	spinlock_t cleanup_lock;
++	struct wed_wo_queue_entry *entry;
++	struct wed_wo_desc *desc;
++
++	u16 first;
++	u16 head;
++	u16 tail;
++	int ndesc;
++	int queued;
++	int buf_size;
++
++	u8 hw_idx;
++	u8 qid;
++	u8 flags;
++
++	dma_addr_t desc_dma;
++	struct page_frag_cache rx_page;
++};
++
++
++struct wed_wo_mmio {
++	struct regmap *regs;
++
++	spinlock_t irq_lock;
++	u8 irq;
++	u32 irqmask;
++
++	u32 q_int_mask;
++	u32 q_exep_mask;
++};
++
++struct wed_wo_mcu {
++	struct mutex mutex;
++	u32 msg_seq;
++	int timeout;
++
++	struct sk_buff_head res_q;
++	wait_queue_head_t wait;
++};
++
++struct wed_wo_exception {
++	void* log;
++	int log_size;
++	dma_addr_t phys;
++};
++
++struct wed_wo_queue_regs {
++	u32 desc_base;
++	u32 ring_size;
++	u32 cpu_idx;
++	u32 dma_idx;
++};
++
++struct wed_wo_desc {
++	__le32 buf0;
++	__le32 ctrl;
++	__le32 buf1;
++	__le32 info;
++	__le32 reserved[4];
++} __packed __aligned(32);
++
++struct wed_wo_queue_entry {
++	union {
++		void *buf;
++		struct sk_buff *skb;
++	};
++
++	u32 dma_addr;
++	u16 dma_len;
++	u16 wcid;
++	bool skip_buf0:1;
++	bool skip_buf1:1;
++	bool done:1;
++};
++
++struct wo_cmd_rxcnt_t {
++	u16 wlan_idx;
++	u16 tid;
++	u32 rx_pkt_cnt;
++	u32 rx_byte_cnt;
++	u32 rx_err_cnt;
++	u32 rx_drop_cnt;
++};
++
++struct wo_cmd_query {
++	u32 query0;
++	u32 query1;
++};
++
++struct wed_cmd_hdr {
++	/*DW0*/
++	u8 ver;
++	u8 cmd_id;
++	u16 length;
++
++	/*DW1*/
++	u16 uni_id;
++	u16 flag;
++
++	/*DW2*/
++	int status;
++
++	/*DW3*/
++	u8 reserved[20];
++};
++
++struct mtk_wed_fw_region {
++	void *addr;
++	u32 addr_pa;
++	u32 size;
++	u32 shared;
++};
++
++struct wed_wo_queue_ops;
++struct wed_wo_drv_ops;
++struct wed_wo_mcu_ops;
++
++struct wo_rx_total_cnt {
++	u64 rx_pkt_cnt;
++	u64 rx_byte_cnt;
++	u64 rx_err_cnt;
++	u64 rx_drop_cnt;
++};
++
++struct mtk_wed_wo {
++	struct mtk_wed_hw *hw;
++
++	struct wed_wo_mmio ccif;
++	struct wed_wo_mcu mcu;
++	struct wed_wo_exception exp;
++
++	const struct wed_wo_drv_ops *drv_ops;
++	const struct wed_wo_mcu_ops *mcu_ops;
++	const struct wed_wo_queue_ops *queue_ops;
++
++	struct net_device napi_dev;
++	spinlock_t rx_lock;
++	struct napi_struct napi;
++	struct sk_buff_head rx_skb;
++	struct wed_wo_queue q_rx;
++	struct tasklet_struct irq_tasklet;
++
++	struct wed_wo_queue q_tx;
++
++	struct mtk_wed_fw_region region[__WO_REGION_MAX];
++
++	struct wed_wo_profile_stat total[WED_WO_PROFILE_MAX_LVL];
++	struct wed_wo_profile_stat mod[WED_WO_PROFILE_MAX_LVL];
++	struct wed_wo_profile_stat rro[WED_WO_PROFILE_MAX_LVL];
++	char dirname[4];
++	struct wo_rx_total_cnt wo_rxcnt[8][544];
++};
++
++struct wed_wo_queue_ops {
++	int (*init)(struct mtk_wed_wo *wo,
++		    int (*poll)(struct napi_struct *napi, int budget));
++
++	int (*alloc)(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
++		     int idx, int n_desc, int bufsize,
++		     struct wed_wo_queue_regs *regs);
++
++	void (*reset)(struct mtk_wed_wo *wo, struct wed_wo_queue *q);
++
++	int (*tx_skb)(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
++		      struct sk_buff *skb);
++	int (*tx_skb1)(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
++		       u8 *msg, u32 msg_len);
++	void (*tx_clean)(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
++			 bool flush);
++
++	void (*rx_clean)(struct mtk_wed_wo *wo, struct wed_wo_queue *q);
++
++	void (*kick)(struct mtk_wed_wo *wo, struct wed_wo_queue *q, int offset);
++};
++
++struct wed_wo_drv_ops {
++	void (*kickout)(struct mtk_wed_wo *wo);
++	void (*set_ack)(struct mtk_wed_wo *wo, u32 mask);
++	void (*set_isr)(struct mtk_wed_wo *wo, u32 mask);
++	u32 (*get_csr)(struct mtk_wed_wo *wo);
++	int (*tx_prepare_skb)(struct mtk_wed_wo *wo);
++	bool (*check_excpetion)(struct mtk_wed_wo *wo);
++	void (*clear_int)(struct mtk_wed_wo *wo, u32 mask);
++};
++
++struct wed_wo_mcu_ops {
++	u32 headroom;
++
++	int (*mcu_skb_send_msg)(struct mtk_wed_wo *wo, int to_id,
++				int cmd, struct sk_buff *skb,
++				int *seq, bool wait_resp);
++
++	int (*mcu_parse_response)(struct mtk_wed_wo *wo, int cmd,
++				  struct sk_buff *skb, int seq);
++
++	int (*mcu_restart)(struct mtk_wed_wo *wo);
++};
++
++#define mtk_wed_wo_q_init(wo, ...)	(wo)->queue_ops->init((wo), __VA_ARGS__)
++#define mtk_wed_wo_q_alloc(wo, ...)	(wo)->queue_ops->alloc((wo), __VA_ARGS__)
++#define mtk_wed_wo_q_reset(wo, ...)	(wo)->queue_ops->init((wo), __VA_ARGS__)
++#define mtk_wed_wo_q_tx_skb(wo, ...)	(wo)->queue_ops->tx_skb((wo), __VA_ARGS__)
++#define mtk_wed_wo_q_tx_skb1(wo, ...)	(wo)->queue_ops->tx_skb1((wo), __VA_ARGS__)
++#define mtk_wed_wo_q_tx_clean(wo, ...)	(wo)->queue_ops->tx_clean((wo), __VA_ARGS__)
++#define mtk_wed_wo_q_rx_clean(wo, ...)	(wo)->queue_ops->rx_clean((wo), __VA_ARGS__)
++#define mtk_wed_wo_q_kick(wo, ...)	(wo)->queue_ops->kick((wo), __VA_ARGS__)
++
++enum {
++	WARP_CMD_FLAG_RSP		= 1 << 0, /* is responce*/
++	WARP_CMD_FLAG_NEED_RSP		= 1 << 1, /* need responce */
++	WARP_CMD_FLAG_FROM_TO_WO	= 1 << 2, /* send between host and wo */
++};
++
++#define WED_WO_CMD_FLAG_IS_RSP(_hdr)		((_hdr)->flag & (WARP_CMD_FLAG_RSP))
++#define WED_WO_CMD_FLAG_SET_RSP(_hdr)		((_hdr)->flag |= (WARP_CMD_FLAG_RSP))
++#define WED_WO_CMD_FLAG_IS_NEED_RSP(_hdr)	((_hdr)->flag & (WARP_CMD_FLAG_NEED_RSP))
++#define WED_WO_CMD_FLAG_SET_NEED_RSP(_hdr)	((_hdr)->flag |= (WARP_CMD_FLAG_NEED_RSP))
++#define WED_WO_CMD_FLAG_IS_FROM_TO_WO(_hdr)	((_hdr)->flag & (WARP_CMD_FLAG_FROM_TO_WO))
++#define WED_WO_CMD_FLAG_SET_FROM_TO_WO(_hdr)	((_hdr)->flag |= (WARP_CMD_FLAG_FROM_TO_WO))
++
++void mtk_wed_wo_set_isr_mask(struct mtk_wed_wo *wo, bool set,
++			     u32 clear, u32 val);
++
++static inline void mtk_wed_wo_isr_enable(struct mtk_wed_wo *wo, u32 mask)
++{
++	mtk_wed_wo_set_isr_mask(wo, false, 0, mask);
++
++	tasklet_schedule(&wo->irq_tasklet);
++}
++
++static inline void mtk_wed_wo_isr_disable(struct mtk_wed_wo *wo, u32 mask)
++{
++	mtk_wed_wo_set_isr_mask(wo, true, mask, 0);
++}
++
++static inline void
++wo_w32(struct mtk_wed_wo *dev, u32 reg, u32 val)
++{
++	writel(val, dev->region[WO_REGION_BOOT].addr + reg);
++}
++
++static inline u32
++wo_r32(struct mtk_wed_wo *dev, u32 reg)
++{
++	return readl(dev->region[WO_REGION_BOOT].addr + reg);
++}
++static inline void
++woccif_w32(struct mtk_wed_wo *dev, u32 reg, u32 val)
++{
++	regmap_write(dev->ccif.regs, reg, val);
++}
++
++static inline u32
++woccif_r32(struct mtk_wed_wo *dev, u32 reg)
++{
++	unsigned int val;
++
++	regmap_read(dev->ccif.regs, reg, &val);
++
++	return val;
++}
++
++int mtk_wed_wo_init(struct mtk_wed_hw *hw);
++#endif
++
+diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h
+index 24742604b..b6b6823ae 100644
+--- a/include/linux/soc/mediatek/mtk_wed.h
++++ b/include/linux/soc/mediatek/mtk_wed.h
+@@ -7,6 +7,9 @@
+ #include <linux/pci.h>
+ 
+ #define MTK_WED_TX_QUEUES		2
++#define MTK_WED_RX_QUEUES		2
++
++#define WED_WO_STA_REC			0x6
+ 
+ enum {
+ 	MTK_NO_WED,
+@@ -33,6 +36,24 @@ struct mtk_wed_ring {
+ 	void __iomem *wpdma;
+ };
+ 
++struct mtk_rxbm_desc {
++	__le32 buf0;
++	__le32 token;
++} __packed __aligned(4);
++
++struct dma_buf {
++	int size;
++	void **pages;
++	struct mtk_wdma_desc *desc;
++	dma_addr_t desc_phys;
++};
++
++struct dma_entry {
++	int size;
++	struct mtk_rxbm_desc *desc;
++	dma_addr_t desc_phys;
++};
++
+ struct mtk_wed_device {
+ #ifdef CONFIG_NET_MEDIATEK_SOC_WED
+ 	const struct mtk_wed_ops *ops;
+@@ -46,19 +67,27 @@ struct mtk_wed_device {
+ 	struct mtk_wed_ring tx_ring[MTK_WED_TX_QUEUES];
+ 	struct mtk_wed_ring txfree_ring;
+ 	struct mtk_wed_ring tx_wdma[MTK_WED_TX_QUEUES];
++	struct mtk_wed_ring rx_ring[MTK_WED_RX_QUEUES];
++	struct mtk_wed_ring rx_wdma[MTK_WED_RX_QUEUES];
++
++	struct dma_buf buf_ring;
++	struct dma_entry rx_buf_ring;
++	struct page_frag_cache rx_page;
+ 
+ 	struct {
+-		int size;
+-		void **pages;
+-		struct mtk_wdma_desc *desc;
+-		dma_addr_t desc_phys;
+-	} buf_ring;
++		struct mtk_wed_ring rro_ring;
++		void __iomem *rro_desc;
++		dma_addr_t miod_desc_phys;
++		dma_addr_t fdbk_desc_phys;
++		u32 mcu_view_miod;
++	} rro;
+ 
+ 	/* filled by driver: */
+ 	struct {
+ 		struct pci_dev *pci_dev;
+ 		void __iomem *base;
+ 		u32 bus_type;
++		u32 phy_base;
+ 
+ 		union {
+ 			u32 wpdma_phys;
+@@ -67,16 +96,25 @@ struct mtk_wed_device {
+ 		u32 wpdma_mask;
+ 		u32 wpdma_tx;
+ 		u32 wpdma_txfree;
++		u32 wpdma_rx_glo;
++		u32 wpdma_rx;
+ 
+ 		u8 tx_tbit[MTK_WED_TX_QUEUES];
++		u8 rx_tbit[MTK_WED_RX_QUEUES];
+ 		u8 txfree_tbit;
+ 
+ 		u16 token_start;
+ 		unsigned int nbuf;
++		unsigned int rx_nbuf;
++		unsigned int rx_pkt;
++		unsigned int rx_pkt_size;
+ 
+ 		u32 (*init_buf)(void *ptr, dma_addr_t phys, int token_id);
+ 		int (*offload_enable)(struct mtk_wed_device *wed);
+ 		void (*offload_disable)(struct mtk_wed_device *wed);
++		u32 (*init_rx_buf)(struct mtk_wed_device *wed,
++				   int pkt_num);
++		void (*release_rx_buf)(struct mtk_wed_device *wed);
+ 	} wlan;
+ #endif
+ };
+@@ -87,6 +125,10 @@ struct mtk_wed_ops {
+ 			     void __iomem *regs);
+ 	int (*txfree_ring_setup)(struct mtk_wed_device *dev,
+ 				 void __iomem *regs);
++	int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring,
++				 void __iomem *regs);
++	int (*msg_update)(struct mtk_wed_device *dev, int cmd_id,
++			  void *data, int len);
+ 	void (*detach)(struct mtk_wed_device *dev);
+ 
+ 	void (*stop)(struct mtk_wed_device *dev);
+@@ -98,6 +140,8 @@ struct mtk_wed_ops {
+ 
+ 	u32 (*irq_get)(struct mtk_wed_device *dev, u32 mask);
+ 	void (*irq_set_mask)(struct mtk_wed_device *dev, u32 mask);
++	void (*ppe_check)(struct mtk_wed_device *dev, struct sk_buff *skb, 
++			  u32 reason, u32 hash);
+ };
+ 
+ extern const struct mtk_wed_ops __rcu *mtk_soc_wed_ops;
+@@ -130,6 +174,10 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
+ 	(_dev)->ops->tx_ring_setup(_dev, _ring, _regs)
+ #define mtk_wed_device_txfree_ring_setup(_dev, _regs) \
+ 	(_dev)->ops->txfree_ring_setup(_dev, _regs)
++#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) \
++	(_dev)->ops->rx_ring_setup(_dev, _ring, _regs)
++#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \
++	(_dev)->ops->msg_update(_dev, _id, _msg, _len)
+ #define mtk_wed_device_reg_read(_dev, _reg) \
+ 	(_dev)->ops->reg_read(_dev, _reg)
+ #define mtk_wed_device_reg_write(_dev, _reg, _val) \
+@@ -138,6 +186,8 @@ mtk_wed_device_attach(struct mtk_wed_device *dev)
+ 	(_dev)->ops->irq_get(_dev, _mask)
+ #define mtk_wed_device_irq_set_mask(_dev, _mask) \
+ 	(_dev)->ops->irq_set_mask(_dev, _mask)
++#define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \
++	(_dev)->ops->ppe_check(_dev, _skb, _reason, _hash)
+ #else
+ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
+ {
+@@ -147,10 +197,13 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev)
+ #define mtk_wed_device_start(_dev, _mask) do {} while (0)
+ #define mtk_wed_device_tx_ring_setup(_dev, _ring, _regs) -ENODEV
+ #define mtk_wed_device_txfree_ring_setup(_dev, _ring, _regs) -ENODEV
++#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV
++#define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV
+ #define mtk_wed_device_reg_read(_dev, _reg) 0
+ #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0)
+ #define mtk_wed_device_irq_get(_dev, _mask) 0
+ #define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0)
++#define mtk_wed_device_ppe_check(_dev, _hash)  do {} while (0)
+ #endif
+ 
+ #endif
+-- 
+2.18.0
+