[rdk-b][common][bsp][Refactor and sync kernel/wifi from Openwrt]

[Description]
Refactor and sync kernel/wifi from Openwrt,Add HW QoS/PPPQ support

[Release-log]
N/A

diff --git a/recipes-kernel/linux-mt76/files/patches/3001-mt76-add-wed-tx-support.patch b/recipes-kernel/linux-mt76/files/patches/3001-mt76-add-wed-tx-support.patch
index b5b407c..6e4f96e 100644
--- a/recipes-kernel/linux-mt76/files/patches/3001-mt76-add-wed-tx-support.patch
+++ b/recipes-kernel/linux-mt76/files/patches/3001-mt76-add-wed-tx-support.patch
@@ -7,7 +7,7 @@
 ---
  mt76_connac.h   |   1 +
  mt7915/dma.c    |  59 +++++++++++++++++++-------
- mt7915/mac.c    |   4 +-
+ mt7915/mac.c    |  11 +++--
  mt7915/main.c   |   9 +++-
  mt7915/mcu.c    |   2 +-
  mt7915/mmio.c   | 110 +++++++++++++++++++++++++++++++++++++++++++++++-
@@ -159,6 +159,31 @@
  }
  
  static void
+@@ -944,6 +944,7 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, void *data, int len)
+ 	LIST_HEAD(free_list);
+ 	void *end = data + len;
+ 	bool v3, wake = false;
++	bool with_txwi = true;
+ 	u16 total, count = 0;
+ 	u32 txd = le32_to_cpu(free->txd);
+ 	__le32 *cur_info;
+@@ -997,12 +998,14 @@ mt7915_mac_tx_free(struct mt7915_dev *dev, void *data, int len)
+ 			txwi = mt76_token_release(mdev, msdu, &wake);
+ 			if (!txwi)
+ 				continue;
++			else
++				with_txwi = false;
+
+ 			mt7915_txwi_free(dev, txwi, sta, &free_list);
+ 		}
+ 	}
+-
+-	mt7915_mac_tx_free_done(dev, &free_list, wake);
++	if (!with_txwi)
++		mt7915_mac_tx_free_done(dev, &free_list, wake);
+ }
+
+ static void
 diff --git a/mt7915/main.c b/mt7915/main.c
 index 192b0a9b..3a09f3f5 100644
 --- a/mt7915/main.c
diff --git a/recipes-kernel/linux-mt76/files/patches/3003-mt76-add-wed-rx-support.patch b/recipes-kernel/linux-mt76/files/patches/3003-mt76-add-wed-rx-support.patch
index d07d072..4646c44 100644
--- a/recipes-kernel/linux-mt76/files/patches/3003-mt76-add-wed-rx-support.patch
+++ b/recipes-kernel/linux-mt76/files/patches/3003-mt76-add-wed-rx-support.patch
@@ -758,7 +758,17 @@
 index ac30698f..197a0169 100644
 --- a/mt7915/dma.c
 +++ b/mt7915/dma.c
-@@ -349,6 +349,7 @@ static int mt7915_dma_enable(struct mt7915_dev *dev)
+@@ -337,7 +337,8 @@ static int mt7915_dma_enable(struct mt7915_dev *dev)
+ 		wed_irq_mask |= MT_INT_TX_DONE_BAND0 | MT_INT_TX_DONE_BAND1;
+ 		if (!is_mt7986(&dev->mt76))
+ 			mt76_wr(dev, MT_INT_WED_MASK_CSR, wed_irq_mask);
+-		mt76_wr(dev, MT_INT_MASK_CSR, wed_irq_mask);
++		else
++			mt76_wr(dev, MT_INT_MASK_CSR, wed_irq_mask);
+ 		mtk_wed_device_start(&dev->mt76.mmio.wed, wed_irq_mask);
+ 	}
+
+@@ -349,6 +350,7 @@ static int mt7915_dma_enable(struct mt7915_dev *dev)
  int mt7915_dma_init(struct mt7915_dev *dev, struct mt7915_phy *phy2)
  {
  	struct mt76_dev *mdev = &dev->mt76;
@@ -1127,7 +1137,7 @@
  
  #define MT_WFDMA0_MCU_HOST_INT_ENA	MT_WFDMA0(0x1f4)
  #define MT_WFDMA0_MT_WA_WDT_INT		BIT(31)
-@@ -668,12 +675,15 @@ enum offs_rev {
+@@ -668,12 +675,18 @@ enum offs_rev {
  #define MT_TXQ_EXT_CTRL(q)		(MT_Q_BASE(__TXQ(q)) + 0x600 +	\
  					 MT_TXQ_ID(q)* 0x4)
  
@@ -1139,9 +1149,13 @@
  
  #define MT_WED_TX_DONE_BAND0		(is_mt7915(mdev)? 4 : 30)
  #define MT_WED_TX_DONE_BAND1		(is_mt7915(mdev)? 5 : 31)
- #define MT_WED_TX_FREE_DONE		(is_mt7915(mdev)? 1 : 2)
-+#define MT_WED_RX_DONE_BAND0		(is_mt7915(mdev)? 16 : 22)
-+#define MT_WED_RX_DONE_BAND1		(is_mt7915(mdev)? 17 : 23)
+-#define MT_WED_TX_FREE_DONE		(is_mt7915(mdev)? 1 : 2)
++#define MT_WED_TX_FREE_DONE		(is_mt7986(mdev) ? 2 : 1)
++#define MT_WED_RX_DONE_BAND0		(is_mt7915(mdev) ? 16 : \
++					 (is_mt7986(mdev) ? 22 : 18))
++#define MT_WED_RX_DONE_BAND1		(is_mt7915(mdev) ? 17 : \
++					 (is_mt7986(mdev) ? 23 : 19))
++
  
  #define MT_INT_SOURCE_CSR		__REG(INT_SOURCE_CSR)
  #define MT_INT_MASK_CSR			__REG(INT_MASK_CSR)
diff --git a/recipes-kernel/linux-mt76/files/patches/3009-mt76-mt7915-find-rx-token-by-physical-address.patch b/recipes-kernel/linux-mt76/files/patches/3009-mt76-mt7915-find-rx-token-by-physical-address.patch
new file mode 100644
index 0000000..8e1873d
--- /dev/null
+++ b/recipes-kernel/linux-mt76/files/patches/3009-mt76-mt7915-find-rx-token-by-physical-address.patch
@@ -0,0 +1,50 @@
+From c72a8943ae9e7b97d9f67c066fe232eba920f517 Mon Sep 17 00:00:00 2001
+From: Peter Chiu <chui-hao.chiu@mediatek.com>
+Date: Tue, 27 Sep 2022 16:34:26 +0800
+Subject: [PATCH 3009/3010] mt76: mt7915: find rx token by physical address
+
+The token id in RxDMAD may be incorrect when it is not the last frame due to
+WED HW bug. Lookup correct token id by physical address in sdp0.
+
+Signed-off-by: Peter Chiu <chui-hao.chiu@mediatek.com>
+---
+ dma.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/dma.c b/dma.c
+index fa56ccfb..c5513690 100644
+--- a/dma.c
++++ b/dma.c
+@@ -380,11 +380,28 @@ mt76_dma_get_buf(struct mt76_dev *dev, struct mt76_queue *q, int idx,
+
+ 	type = FIELD_GET(MT_QFLAG_WED_TYPE, q->flags);
+ 	if (mtk_wed_device_active(wed) && type == MT76_WED_Q_RX) {
+-		u32 token;
++		u32 token, id, find = 0;
+ 		struct mt76_txwi_cache *r;
+
+ 		token = FIELD_GET(MT_DMA_CTL_TOKEN, desc->buf1);
+
++		if (*more) {
++			spin_lock_bh(&dev->rx_token_lock);
++
++			idr_for_each_entry(&dev->rx_token, r, id) {
++				if (r->dma_addr == le32_to_cpu(desc->buf0)) {
++					find = 1;
++					desc->buf1 = FIELD_PREP(MT_DMA_CTL_TOKEN, id);
++					token = id;
++					break;
++				}
++			}
++
++			spin_unlock_bh(&dev->rx_token_lock);
++			if (!find)
++				return NULL;
++
++		}
+ 		r = mt76_rx_token_release(dev, token);
+ 		if (!r)
+ 			return NULL;
+--
+2.18.0
+
diff --git a/recipes-kernel/linux-mt76/files/patches/patches.inc b/recipes-kernel/linux-mt76/files/patches/patches.inc
index 98709a2..eb2119c 100644
--- a/recipes-kernel/linux-mt76/files/patches/patches.inc
+++ b/recipes-kernel/linux-mt76/files/patches/patches.inc
@@ -40,4 +40,5 @@
     file://3006-mt76-mt7915-add-statistic-for-H-W-Rx-Path.patch \
     file://3007-mt76-mt7915-enable-red-per-band-token-drop-for-HW-Pa.patch \
     file://3008-mt76-mt7915-update-mt7916-trinfo-when-hw-path-enable.patch \
+    file://3009-mt76-mt7915-find-rx-token-by-physical-address.patch \
     "
diff --git a/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9997-add-wed-rx-support-for-mt7896.patch b/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9997-add-wed-rx-support-for-mt7896.patch
index e99920a..00909d5 100644
--- a/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9997-add-wed-rx-support-for-mt7896.patch
+++ b/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9997-add-wed-rx-support-for-mt7896.patch
@@ -16,8 +16,8 @@
  drivers/net/ethernet/mediatek/mtk_wed_mcu.c   | 586 ++++++++++++++++
  drivers/net/ethernet/mediatek/mtk_wed_mcu.h   | 125 ++++
  drivers/net/ethernet/mediatek/mtk_wed_regs.h  | 144 +++-
- drivers/net/ethernet/mediatek/mtk_wed_wo.c    | 573 ++++++++++++++++
- drivers/net/ethernet/mediatek/mtk_wed_wo.h    | 327 +++++++++
+ drivers/net/ethernet/mediatek/mtk_wed_wo.c    | 564 ++++++++++++++++
+ drivers/net/ethernet/mediatek/mtk_wed_wo.h    | 324 +++++++++
  include/linux/soc/mediatek/mtk_wed.h          |  74 ++-
  14 files changed, 2796 insertions(+), 75 deletions(-)
  create mode 100644 drivers/net/ethernet/mediatek/mtk_wed_ccif.c
@@ -1359,7 +1359,7 @@
 +	tasklet_disable(&wo->irq_tasklet);
 +	netif_napi_del(&wo->napi);
 +
-+	mtk_wed_wo_q_tx_clean(wo, &wo->q_tx, true);
++	mtk_wed_wo_q_tx_clean(wo, &wo->q_tx);
 +	mtk_wed_wo_q_rx_clean(wo, &wo->q_rx);
 +	mtk_wed_wo_q_free(wo, &wo->q_tx);
 +	mtk_wed_wo_q_free(wo, &wo->q_rx);
@@ -2539,7 +2539,7 @@
 index 0000000..8434272
 --- /dev/null
 +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c
-@@ -0,0 +1,573 @@
+@@ -0,0 +1,564 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +
 +#include <linux/kernel.h>
@@ -2611,10 +2611,11 @@
 +}
 +
 +static int
-+woif_q_rx_fill(struct mtk_wed_wo *wo, struct wed_wo_queue *q)
++woif_q_rx_fill(struct mtk_wed_wo *wo, struct wed_wo_queue *q, bool rx)
 +{
 +	int len = q->buf_size, frames = 0;
 +	struct wed_wo_queue_entry *entry;
++	struct page_frag_cache *page = &q->tx_page;
 +	struct wed_wo_desc *desc;
 +	dma_addr_t addr;
 +	u32 ctrl = 0;
@@ -2625,9 +2626,11 @@
 +
 +	spin_lock_bh(&q->lock);
 +
-+	while (q->queued < q->ndesc) {
++	if(rx)
++		page = &q->rx_page;
 +
-+		buf = page_frag_alloc(&q->rx_page, len, GFP_ATOMIC);
++	while (q->queued < q->ndesc) {
++		buf = page_frag_alloc(page, len, GFP_ATOMIC);
 +		if (!buf)
 +			break;
 +
@@ -2636,8 +2639,6 @@
 +			skb_free_frag(buf);
 +			break;
 +		}
-+		dma_sync_single_for_cpu(wo->hw->dev, addr, len,
-+					DMA_TO_DEVICE);
 +
 +		q->head = (q->head + 1) % q->ndesc;
 +
@@ -2647,13 +2648,13 @@
 +		entry->dma_addr = addr;
 +		entry->dma_len = len;
 +
-+		ctrl = FIELD_PREP(WED_CTL_SD_LEN0, entry->dma_len);
-+		ctrl |= WED_CTL_LAST_SEC0;
++		if (rx) {
++			ctrl = FIELD_PREP(WED_CTL_SD_LEN0, entry->dma_len);
++			ctrl |= WED_CTL_LAST_SEC0;
 +
-+		WRITE_ONCE(desc->buf0, cpu_to_le32(addr));
-+		WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
-+		dma_sync_single_for_device(wo->hw->dev, addr, len,
-+					DMA_TO_DEVICE);
++			WRITE_ONCE(desc->buf0, cpu_to_le32(addr));
++			WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
++		}
 +		q->queued++;
 +		q->entry[q->head].buf = buf;
 +
@@ -2668,7 +2669,7 @@
 +static void
 +woif_q_rx_fill_process(struct mtk_wed_wo *wo, struct wed_wo_queue *q)
 +{
-+	if(woif_q_rx_fill(wo, q))
++	if(woif_q_rx_fill(wo, q, true))
 +		woif_q_kick(wo, q, -1);
 +}
 +
@@ -2706,8 +2707,11 @@
 +	if (!q->entry)
 +		return -ENOMEM;
 +
-+	if (idx == 0)
++	if (idx == 0) {
++		/* alloc tx buf */
++		woif_q_rx_fill(dev, &dev->q_tx, false);
 +		woif_q_reset(dev, &dev->q_tx);
++	}
 +
 +	return 0;
 +}
@@ -2730,44 +2734,36 @@
 +}
 +
 +static void
-+woif_q_tx_clean(struct mtk_wed_wo *wo, struct wed_wo_queue *q, bool flush)
++woif_q_tx_clean(struct mtk_wed_wo *wo, struct wed_wo_queue *q)
 +{
-+	int last;
++	struct page *page;
++	int i = 0;
 +
 +	if (!q || !q->ndesc)
 +		return;
 +
-+	spin_lock_bh(&q->cleanup_lock);
-+	if (flush)
-+		last = -1;
-+	else
-+		last = woccif_r32(wo, q->regs->dma_idx);
-+
-+	while (q->queued > 0 && q->tail != last) {
++	spin_lock_bh(&q->lock);
++	while (i < q->ndesc) {
 +		struct wed_wo_queue_entry *e;
 +
-+		e = &q->entry[q->tail + 1];
++		e = &q->entry[i];
++		i++;
 +
++		if (!e)
++			continue;
 +		dma_unmap_single(wo->hw->dev, e->dma_addr, e->dma_len,
 +				 DMA_TO_DEVICE);
 +
-+
-+		memset(e, 0, sizeof(*e));
-+
-+		spin_lock_bh(&q->lock);
-+		q->tail = (q->tail + 1) % q->ndesc;
-+		q->queued--;
-+		spin_unlock_bh(&q->lock);
-+
++		skb_free_frag(e->buf);
 +	}
-+	spin_unlock_bh(&q->cleanup_lock);
++	spin_unlock_bh(&q->lock);
 +
-+	if (flush) {
-+		spin_lock_bh(&q->lock);
-+		woif_q_sync_idx(wo, q);
-+		woif_q_kick(wo, q, 0);
-+		spin_unlock_bh(&q->lock);
-+	}
++	if (!q->tx_page.va)
++		return;
++
++	page = virt_to_page(q->tx_page.va);
++	__page_frag_cache_drain(page, q->tx_page.pagecnt_bias);
++	memset(&q->tx_page, 0, sizeof(q->tx_page));
 +}
 +
 +static void *
@@ -2838,7 +2834,6 @@
 +	page = virt_to_page(q->rx_page.va);
 +	__page_frag_cache_drain(page, q->rx_page.pagecnt_bias);
 +	memset(&q->rx_page, 0, sizeof(q->rx_page));
-+
 +}
 +
 +static int
@@ -2851,7 +2846,7 @@
 +
 +	if (dev->q_rx.ndesc) {
 +		netif_napi_add(&dev->napi_dev, &dev->napi, poll, 64);
-+		woif_q_rx_fill(dev, &dev->q_rx);
++		woif_q_rx_fill(dev, &dev->q_rx, true);
 +		woif_q_reset(dev, &dev->q_rx);
 +		napi_enable(&dev->napi);
 +	}
@@ -2884,33 +2879,33 @@
 +{
 +	struct wed_wo_queue_entry *entry;
 +	struct wed_wo_desc *desc;
-+	int len, ret, idx = -1;
++	int len, ret = 0, idx = -1;
 +	dma_addr_t addr;
 +	u32 ctrl = 0;
 +
 +	len = skb->len;
-+	addr = dma_map_single(wo->hw->dev, skb->data, len, DMA_TO_DEVICE);
-+	if (unlikely(dma_mapping_error(wo->hw->dev, addr)))
-+		goto error;
-+
-+	/* packet tx, force trigger tx clean. */
-+		woif_q_tx_clean(wo, q, false);
++	spin_lock_bh(&q->lock);
 +
-+	if (q->queued >= q->ndesc) {
++	q->tail = woccif_r32(wo, q->regs->dma_idx);
++	q->head = (q->head + 1) % q->ndesc;
++	if (q->tail == q->head) {
 +		ret = -ENOMEM;
 +		goto error;
 +	}
 +
-+	spin_lock_bh(&q->lock);
-+
-+	q->head = (q->head + 1) % q->ndesc;
 +	idx = q->head;
-+
 +	desc = &q->desc[idx];
 +	entry = &q->entry[idx];
 +
-+	entry->dma_addr = addr;
-+	entry->dma_len = len;
++	if (len > entry->dma_len) {
++		ret = -ENOMEM;
++		goto error;
++	}
++	addr = entry->dma_addr;
++
++	dma_sync_single_for_cpu(wo->hw->dev, addr, len, DMA_TO_DEVICE);
++	memcpy(entry->buf, skb->data, len);
++	dma_sync_single_for_device(wo->hw->dev, addr, len, DMA_TO_DEVICE);
 +
 +	ctrl = FIELD_PREP(WED_CTL_SD_LEN0, len);
 +	ctrl |= WED_CTL_LAST_SEC0;
@@ -2919,18 +2914,14 @@
 +	WRITE_ONCE(desc->buf0, cpu_to_le32(addr));
 +	WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
 +
-+	q->queued++;
-+	q->entry[idx].skb = skb;
-+
 +	woif_q_kick(wo, q, 0);
 +	wo->drv_ops->kickout(wo);
 +
 +	spin_unlock_bh(&q->lock);
-+	return 0;
 +
 +error:
 +	dev_kfree_skb(skb);
-+	return -ENOMEM;
++	return ret;
 +}
 +
 +static const struct wed_wo_queue_ops wo_queue_ops = {
@@ -2947,7 +2938,7 @@
 +static int
 +mtk_wed_wo_rx_process(struct mtk_wed_wo *wo, struct wed_wo_queue *q, int budget)
 +{
-+	int len, data_len, done = 0;
++	int len, done = 0;
 +	struct sk_buff *skb;
 +	unsigned char *data;
 +	bool more;
@@ -3118,7 +3109,7 @@
 index 0000000..5824f39
 --- /dev/null
 +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.h
-@@ -0,0 +1,327 @@
+@@ -0,0 +1,324 @@
 +// SPDX-License-Identifier: GPL-2.0-only
 +/* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name> */
 +
@@ -3205,6 +3196,7 @@
 +
 +	dma_addr_t desc_dma;
 +	struct page_frag_cache rx_page;
++	struct page_frag_cache tx_page;
 +};
 +
 +
@@ -3344,10 +3336,7 @@
 +
 +	int (*tx_skb)(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
 +		      struct sk_buff *skb);
-+	int (*tx_skb1)(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
-+		       u8 *msg, u32 msg_len);
-+	void (*tx_clean)(struct mtk_wed_wo *wo, struct wed_wo_queue *q,
-+			 bool flush);
++	void (*tx_clean)(struct mtk_wed_wo *wo, struct wed_wo_queue *q);
 +
 +	void (*rx_clean)(struct mtk_wed_wo *wo, struct wed_wo_queue *q);
 +
@@ -3382,7 +3371,6 @@
 +#define mtk_wed_wo_q_free(wo, ...)	(wo)->queue_ops->free((wo), __VA_ARGS__)
 +#define mtk_wed_wo_q_reset(wo, ...)	(wo)->queue_ops->reset((wo), __VA_ARGS__)
 +#define mtk_wed_wo_q_tx_skb(wo, ...)	(wo)->queue_ops->tx_skb((wo), __VA_ARGS__)
-+#define mtk_wed_wo_q_tx_skb1(wo, ...)	(wo)->queue_ops->tx_skb1((wo), __VA_ARGS__)
 +#define mtk_wed_wo_q_tx_clean(wo, ...)	(wo)->queue_ops->tx_clean((wo), __VA_ARGS__)
 +#define mtk_wed_wo_q_rx_clean(wo, ...)	(wo)->queue_ops->rx_clean((wo), __VA_ARGS__)
 +#define mtk_wed_wo_q_kick(wo, ...)	(wo)->queue_ops->kick((wo), __VA_ARGS__)
diff --git a/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9999-flow-offload-add-mtkhnat-qdma-qos.patch b/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9999-flow-offload-add-mtkhnat-qdma-qos.patch
index f5a1b03..989cb6c 100644
--- a/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9999-flow-offload-add-mtkhnat-qdma-qos.patch
+++ b/recipes-kernel/linux/linux-mediatek-5.4/mediatek/flow_patch/9999-flow-offload-add-mtkhnat-qdma-qos.patch
@@ -166,7 +166,7 @@
  	spinlock_t			syscfg0_lock;
  	struct timer_list		mtk_dma_monitor_timer;
  
-+	u8				qos_mode;
++	u8				qos_toggle;
  	u8				ppe_num;
  	struct mtk_ppe			*ppe[MTK_MAX_PPE_NUM];
  	struct rhashtable		flow_table;
@@ -214,21 +214,84 @@
 index f258539..3b17819 100755
 --- a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
 +++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
-@@ -203,9 +203,13 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
- 	}
+@@ -9,6 +9,8 @@
+ #include <linux/ipv6.h>
+ #include <net/flow_offload.h>
+ #include <net/pkt_cls.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_flow_table.h>
+ #include <net/dsa.h>
+ #include "mtk_eth_soc.h"
+ #include "mtk_wed.h"
+@@ -183,7 +185,7 @@ mtk_flow_get_dsa_port(struct net_device **dev)
  
- 	dsa_port = mtk_flow_get_dsa_port(&dev);
--	if (dsa_port >= 0)
-+	if (dsa_port >= 0) {
- 		mtk_foe_entry_set_dsa(foe, dsa_port);
+ static int
+ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+-			   struct net_device *dev, const u8 *dest_mac,
++			   struct net_device *dev, struct nf_conn *ct, const u8 *dest_mac,
+ 			   int *wed_index)
+ {
+ 	struct mtk_wdma_info info = {};
+@@ -211,6 +211,11 @@ mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
+ 	if (dsa_port >= 0)
+ 	mtk_foe_entry_set_dsa(foe, dsa_port);
  
-+		if (eth->qos_mode == 2)
-+			mtk_foe_entry_set_qid(foe, dsa_port);
-+	}
++	if (eth->qos_toggle == 1 || ct->mark >= 6)
++		mtk_foe_entry_set_qid(foe, ct->mark & MTK_QDMA_TX_MASK);
++	if (eth->qos_toggle == 2 && dsa_port >= 0)
++		mtk_foe_entry_set_qid(foe, dsa_port & MTK_QDMA_TX_MASK);
 +
  	if (dev == eth->netdev[0])
  		pse_port = 1;
  	else if (dev == eth->netdev[1])
+@@ -433,7 +443,7 @@ mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
+ 	if (data.pppoe.num == 1)
+ 		mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
+ 
+-	err = mtk_flow_set_output_device(eth, &foe, odev, data.eth.h_dest,
++	err = mtk_flow_set_output_device(eth, &foe, odev, f->flow->ct, data.eth.h_dest,
+ 					 &wed_index);
+ 	if (err)
+ 		return err;
+diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
+index 59b8736..7261b6d 100644
+--- a/include/net/flow_offload.h
++++ b/include/net/flow_offload.h
+@@ -365,6 +378,7 @@ struct flow_cls_offload {
+ 	struct flow_cls_common_offload common;
+ 	enum flow_cls_command command;
+ 	unsigned long cookie;
++	struct flow_offload *flow;
+ 	struct flow_rule *rule;
+ 	struct flow_stats stats;
+ 	u32 classid;
+diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
+index d94c6fb..886ced5 100644
+--- a/net/netfilter/nf_flow_table_offload.c
++++ b/net/netfilter/nf_flow_table_offload.c
+@@ -810,11 +810,13 @@ static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
+ }
+ 
+ static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
++				 struct flow_offload *flow,
+ 				 __be16 proto, int priority,
+ 				 enum flow_cls_command cmd,
+ 				 const struct flow_offload_tuple *tuple,
+ 				 struct netlink_ext_ack *extack)
+ {
++	cls_flow->flow = flow;
+ 	cls_flow->common.protocol = proto;
+ 	cls_flow->common.prio = priority;
+ 	cls_flow->common.extack = extack;
+@@ -836,7 +838,7 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
+ 	__be16 proto = ETH_P_ALL;
+ 	int err, i = 0;
+ 
+-	nf_flow_offload_init(&cls_flow, proto, priority, cmd,
++	nf_flow_offload_init(&cls_flow, flow, proto, priority, cmd,
+ 			     &flow->tuplehash[dir].tuple, &extack);
+ 	if (cmd == FLOW_CLS_REPLACE)
+ 		cls_flow.rule = flow_rule->rule;
 diff --git a/drivers/net/ethernet/mediatek/mtk_qdma_debugfs.c b/drivers/net/ethernet/mediatek/mtk_qdma_debugfs.c
 new file mode 100644
 index 0000000..198b924
@@ -323,15 +386,15 @@
 +
 +	if (buf[0] == '0') {
 +		pr_info("HQoS is going to be disabled !\n");
-+		eth->qos_mode = 0;
++		eth->qos_toggle = 0;
 +		mtk_qdma_qos_disable(eth);
 +	} else if (buf[0] == '1') {
 +		pr_info("HQoS mode is going to be enabled !\n");
-+		eth->qos_mode = 1;
++		eth->qos_toggle = 1;
 +	} else if (buf[0] == '2') {
 +		pr_info("Per-port-per-queue mode is going to be enabled !\n");
 +		pr_info("PPPQ use qid 0~5 (scheduler 0).\n");
-+		eth->qos_mode = 2;
++		eth->qos_toggle = 2;
 +		mtk_qdma_qos_pppq_enable(eth);
 +	}
 +
@@ -343,7 +406,7 @@
 +	struct mtk_eth *eth = m->private;
 +
 +	seq_printf(m, "value=%d, HQoS is %s now!\n",
-+		   eth->qos_mode, (eth->qos_mode) ? "enabled" : "disabled");
++		   eth->qos_toggle, (eth->qos_toggle) ? "enabled" : "disabled");
 +
 +	return 0;
 +}
@@ -652,7 +715,7 @@
 +	if (!root)
 +		return -ENOMEM;
 +
-+	debugfs_create_file("qos_mode", S_IRUGO, root, eth, &fops_qos);
++	debugfs_create_file("qos_toggle", S_IRUGO, root, eth, &fops_qos);
 +
 +	for (i = 0; i < eth->soc->txrx.qdma_tx_sch; i++) {
 +		snprintf(name, sizeof(name), "qdma_sch%ld", i);