[][kernel][mt7988][eth][Add ADMAv2 multiple TX rings support]

[Description]
Add ADMAv2 multiple TX rings support.

Currently, only MT7988 platform uses ADMAv2.

To enable ADMAv2 with multiple TX rings, you must remove
the MTK_QDMA capability in the Ethernet driver, change
the tx_dma_size from 4K to 1K in the Ethernet driver, and
set qos_toggle to 0 (default is 2) using the following
command:
echo 0 > /sys/kernel/debug/hnat/qos_toggle.

Without this patch, only a single TX ring for ADMAv2 is
supported.

[Release-log]
N/A


Change-Id: I848fdf13dd60fa8f3a83ce4883d27bc6559f860a
Reviewed-on: https://gerrit.mediatek.inc/c/openwrt/feeds/mtk_openwrt_feeds/+/9195605
diff --git a/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_dbg.c b/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_dbg.c
index 04fd70a..e8110ef 100644
--- a/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_dbg.c
+++ b/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_dbg.c
@@ -1098,33 +1098,56 @@
 int tx_ring_read(struct seq_file *seq, void *v)
 {
 	struct mtk_eth *eth = g_eth;
-	struct mtk_tx_ring *ring = &g_eth->tx_ring;
+	struct mtk_tx_ring *ring;
 	struct mtk_tx_dma_v2 *tx_ring;
-	int i = 0;
+	dma_addr_t tmp;
+	int i = 0, j = 0;
 
-	seq_printf(seq, "free count = %d\n", (int)atomic_read(&ring->free_count));
-	seq_printf(seq, "cpu next free: %d\n",
-		   (int)(ring->next_free - ring->dma) / eth->soc->txrx.txd_size);
-	seq_printf(seq, "cpu last free: %d\n",
-		   (int)(ring->last_free - ring->dma) / eth->soc->txrx.txd_size);
-	for (i = 0; i < eth->soc->txrx.tx_dma_size; i++) {
-		dma_addr_t tmp = ring->phys +
-				 i * (dma_addr_t)eth->soc->txrx.txd_size;
+	for (j = 0; j < MTK_MAX_TX_RING_NUM; j++) {
+		ring = &eth->tx_ring[j];
+		if (!ring->dma)
+			continue;
 
-		tx_ring = ring->dma + i * eth->soc->txrx.txd_size;
+		seq_printf(seq, "[Ring%d]\n", j);
+		seq_printf(seq, "free count = %d\n", (int)atomic_read(&ring->free_count));
+		seq_printf(seq, "cpu next free: %d\n",
+			   (int)(ring->next_free - ring->dma) / eth->soc->txrx.txd_size);
+		seq_printf(seq, "cpu last free: %d\n",
+			   (int)(ring->last_free - ring->dma) / eth->soc->txrx.txd_size);
+		for (i = 0; i < eth->soc->txrx.tx_dma_size; i++) {
 
-		seq_printf(seq, "%d (%pad): %08x %08x %08x %08x", i, &tmp,
-			   tx_ring->txd1, tx_ring->txd2,
-			   tx_ring->txd3, tx_ring->txd4);
+			if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+				tmp = ring->phys + i * (dma_addr_t)eth->soc->txrx.txd_size;
+				tx_ring = ring->dma + i * eth->soc->txrx.txd_size;
+			} else {
+				tmp = ring->phys_pdma + i * (dma_addr_t)eth->soc->txrx.txd_size;
+				tx_ring = ring->dma_pdma + i * eth->soc->txrx.txd_size;
+			}
 
-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) ||
-		    MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V3)) {
-			seq_printf(seq, " %08x %08x %08x %08x",
-				   tx_ring->txd5, tx_ring->txd6,
-				   tx_ring->txd7, tx_ring->txd8);
+			seq_printf(seq, "%d (%pad): %08x %08x %08x %08x", i, &tmp,
+				   tx_ring->txd1, tx_ring->txd2,
+				   tx_ring->txd3, tx_ring->txd4);
+
+			if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+				if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) ||
+				    MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V3)) {
+					seq_printf(seq, " %08x %08x %08x %08x",
+						   tx_ring->txd5, tx_ring->txd6,
+						   tx_ring->txd7, tx_ring->txd8);
+				}
+			} else {
+				if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V3)) {
+					seq_printf(seq, " %08x %08x %08x %08x",
+						   tx_ring->txd5, tx_ring->txd6,
+						   tx_ring->txd7, tx_ring->txd8);
+				}
+			}
+
+			seq_puts(seq, "\n");
 		}
 
-		seq_printf(seq, "\n");
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			break;
 	}
 
 	return 0;
@@ -1337,10 +1360,12 @@
 		seq_printf(seq, "| QDMA_FSM	: %08x |\n",
 			   mtk_r32(eth, MTK_QDMA_FSM));
 	} else {
-		seq_printf(seq, "| PDMA_CTX_IDX	: %08x |\n",
-			   mtk_r32(eth, MTK_PTX_CTX_IDX0));
-		seq_printf(seq, "| PDMA_DTX_IDX	: %08x |\n",
-			   mtk_r32(eth, MTK_PTX_DTX_IDX0));
+		for (i = 0; i < MTK_MAX_TX_RING_NUM; i++) {
+			seq_printf(seq, "| PDMA_CTX_IDX%d	: %08x |\n",
+				   i, mtk_r32(eth, MTK_PTX_CTX_IDX_CFG(i)));
+			seq_printf(seq, "| PDMA_DTX_IDX%d	: %08x |\n",
+				   i, mtk_r32(eth, MTK_PTX_DTX_IDX_CFG(i)));
+		}
 	}
 
 	seq_printf(seq, "| FE_PSE_FREE	: %08x |\n",
diff --git a/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index adde4eb..fab0526 100644
--- a/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2203,7 +2203,8 @@
 	} else {
 		int next_idx = NEXT_DESP_IDX(txd_to_idx(ring, txd, soc->txrx.txd_size),
 					     ring->dma_size);
-		mtk_w32(eth, next_idx, soc->reg_map->pdma.pctx_ptr);
+		mtk_w32(eth, next_idx,
+			soc->reg_map->pdma.pctx_ptr + ring->ring_no * MTK_QTX_OFFSET);
 	}
 
 	return 0;
@@ -2245,28 +2246,43 @@
 	return nfrags;
 }
 
-static int mtk_queue_stopped(struct mtk_eth *eth)
+static int mtk_queue_stopped(struct mtk_eth *eth, u32 ring_no)
 {
+	struct netdev_queue *txq;
 	int i;
 
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
 		if (!eth->netdev[i])
 			continue;
-		if (netif_queue_stopped(eth->netdev[i]))
-			return 1;
+
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+			if (netif_queue_stopped(eth->netdev[i]))
+				return 1;
+		} else {
+			txq = netdev_get_tx_queue(eth->netdev[i], ring_no);
+			if (netif_tx_queue_stopped(txq))
+				return 1;
+		}
 	}
 
 	return 0;
 }
 
-static void mtk_wake_queue(struct mtk_eth *eth)
+static void mtk_wake_queue(struct mtk_eth *eth, u32 ring_no)
 {
+	struct netdev_queue *txq;
 	int i;
 
 	for (i = 0; i < MTK_MAC_COUNT; i++) {
 		if (!eth->netdev[i])
 			continue;
-		netif_tx_wake_all_queues(eth->netdev[i]);
+
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+			netif_tx_wake_all_queues(eth->netdev[i]);
+		} else {
+			txq = netdev_get_tx_queue(eth->netdev[i], ring_no);
+			netif_tx_wake_queue(txq);
+		}
 	}
 }
 
@@ -2274,10 +2290,13 @@
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
-	struct mtk_tx_ring *ring = &eth->tx_ring;
+	struct mtk_tx_ring *ring;
 	struct net_device_stats *stats = &dev->stats;
+	struct netdev_queue *txq;
 	bool gso = false;
 	int tx_num;
+	int i = 0;
+	int qid = skb_get_queue_mapping(skb);
 
 	/* normally we can rely on the stack not calling this more than once,
 	 * however we have 2 queues running on the same ring so we need to lock
@@ -2288,9 +2307,20 @@
 	if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
 		goto drop;
 
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+		ring = &eth->tx_ring[0];
+	} else {
+		ring = &eth->tx_ring[qid];
+		txq = netdev_get_tx_queue(dev, qid);
+	}
+
 	tx_num = mtk_cal_txd_req(skb);
 	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
-		netif_tx_stop_all_queues(dev);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			netif_tx_stop_all_queues(dev);
+		else
+			netif_tx_stop_queue(txq);
+
 		netif_err(eth, tx_queued, dev,
 			  "Tx Ring full when queue awake!\n");
 		spin_unlock(&eth->page_lock);
@@ -2315,8 +2345,12 @@
 	if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0)
 		goto drop;
 
-	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
-		netif_tx_stop_all_queues(dev);
+	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) {
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			netif_tx_stop_all_queues(dev);
+		else
+			netif_tx_stop_queue(txq);
+	}
 
 	spin_unlock(&eth->page_lock);
 
@@ -2564,11 +2598,11 @@
 }
 
 static void mtk_poll_tx_qdma(struct mtk_eth *eth, int budget,
-			     struct mtk_poll_state *state)
+			     struct mtk_poll_state *state,
+			     struct mtk_tx_ring *ring)
 {
 	const struct mtk_reg_map *reg_map = eth->soc->reg_map;
 	const struct mtk_soc_data *soc = eth->soc;
-	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct mtk_tx_dma *desc;
 	struct sk_buff *skb;
 	struct mtk_tx_buf *tx_buf;
@@ -2615,17 +2649,17 @@
 }
 
 static void mtk_poll_tx_pdma(struct mtk_eth *eth, int budget,
-			     struct mtk_poll_state *state)
+			     struct mtk_poll_state *state,
+			     struct mtk_tx_ring *ring)
 {
 	const struct mtk_soc_data *soc = eth->soc;
-	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct mtk_tx_dma *desc;
 	struct sk_buff *skb;
 	struct mtk_tx_buf *tx_buf;
 	u32 cpu, dma;
 
 	cpu = ring->cpu_idx;
-	dma = mtk_r32(eth, soc->reg_map->pdma.pdtx_ptr);
+	dma = mtk_r32(eth, soc->reg_map->pdma.pdtx_ptr + ring->ring_no * MTK_QTX_OFFSET);
 
 	while ((cpu != dma) && budget) {
 		int mac = 0;
@@ -2661,22 +2695,21 @@
 	ring->cpu_idx = cpu;
 }
 
-static int mtk_poll_tx(struct mtk_eth *eth, int budget)
+static int mtk_poll_tx(struct mtk_eth *eth, int budget, struct mtk_tx_ring *ring)
 {
-	struct mtk_tx_ring *ring = &eth->tx_ring;
 	struct mtk_poll_state state = {};
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
-		mtk_poll_tx_qdma(eth, budget, &state);
+		mtk_poll_tx_qdma(eth, budget, &state, ring);
 	else
-		mtk_poll_tx_pdma(eth, budget, &state);
+		mtk_poll_tx_pdma(eth, budget, &state, ring);
 
 	if (state.txq)
 		netdev_tx_completed_queue(state.txq, state.done, state.bytes);
 
-	if (mtk_queue_stopped(eth) &&
+	if (mtk_queue_stopped(eth, ring->ring_no) &&
 	    (atomic_read(&ring->free_count) > ring->thresh))
-		mtk_wake_queue(eth);
+		mtk_wake_queue(eth, ring->ring_no);
 
 	return state.total;
 }
@@ -2694,18 +2727,20 @@
 
 static int mtk_napi_tx(struct napi_struct *napi, int budget)
 {
-	struct mtk_eth *eth = container_of(napi, struct mtk_eth, tx_napi);
+	struct mtk_napi *tx_napi = container_of(napi, struct mtk_napi, napi);
+	struct mtk_eth *eth = tx_napi->eth;
+	struct mtk_tx_ring *ring = tx_napi->tx_ring;
 	const struct mtk_reg_map *reg_map = eth->soc->reg_map;
 	u32 status, mask;
 	int tx_done = 0;
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
 		mtk_handle_status_irq(eth);
-		mtk_w32(eth, MTK_TX_DONE_INT(0), reg_map->tx_irq_status);
+		mtk_w32(eth, MTK_TX_DONE_INT(ring->ring_no), reg_map->tx_irq_status);
 	} else {
-		mtk_w32(eth, MTK_TX_DONE_INT(0), reg_map->pdma.irq_status);
+		mtk_w32(eth, MTK_TX_DONE_INT(ring->ring_no), reg_map->pdma.irq_status);
 	}
-	tx_done = mtk_poll_tx(eth, budget);
+	tx_done = mtk_poll_tx(eth, budget, ring);
 
 	if (unlikely(netif_msg_intr(eth))) {
 		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
@@ -2727,11 +2762,11 @@
 		status = mtk_r32(eth, reg_map->tx_irq_status);
 	else
 		status = mtk_r32(eth, reg_map->pdma.irq_status);
-	if (status & MTK_TX_DONE_INT(0))
+	if (status & MTK_TX_DONE_INT(ring->ring_no))
 		return budget;
 
 	if (napi_complete(napi))
-		mtk_tx_irq_enable(eth, MTK_TX_DONE_INT(0));
+		mtk_tx_irq_enable(eth, MTK_TX_DONE_INT(ring->ring_no));
 
 	return tx_done;
 }
@@ -2774,10 +2809,10 @@
 	return rx_done + budget - remain_budget;
 }
 
-static int mtk_tx_alloc(struct mtk_eth *eth)
+static int mtk_tx_alloc(struct mtk_eth *eth, int ring_no)
 {
 	const struct mtk_soc_data *soc = eth->soc;
-	struct mtk_tx_ring *ring = &eth->tx_ring;
+	struct mtk_tx_ring *ring = &eth->tx_ring[ring_no];
 	int i, sz = soc->txrx.txd_size;
 	struct mtk_tx_dma_v2 *txd, *pdma_txd;
 
@@ -2790,9 +2825,11 @@
 		ring->dma = dma_alloc_coherent(eth->dma_dev, soc->txrx.tx_dma_size * sz,
 					       &ring->phys, GFP_KERNEL);
 	else {
-		ring->dma =  eth->sram_base + soc->txrx.fq_dma_size * sz;
-		ring->phys = eth->phy_scratch_ring +
-			     soc->txrx.fq_dma_size * (dma_addr_t)sz;
+		dma_addr_t offset = (soc->txrx.fq_dma_size * (dma_addr_t)sz) +
+				    (soc->txrx.tx_dma_size * (dma_addr_t)sz * ring_no);
+
+		ring->dma =  eth->sram_base + offset;
+		ring->phys = eth->phy_scratch_ring + offset;
 	}
 
 	if (!ring->dma)
@@ -2849,6 +2886,7 @@
 	ring->last_free_ptr = (u32)(ring->phys + ((soc->txrx.tx_dma_size - 1) * sz));
 	ring->thresh = MAX_SKB_FRAGS;
 	ring->cpu_idx = 0;
+	ring->ring_no = ring_no;
 
 	/* make sure that all changes to the dma ring are flushed before we
 	 * continue
@@ -2865,10 +2903,13 @@
 		mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES,
 			soc->reg_map->qdma.qtx_cfg);
 	} else {
-		mtk_w32(eth, ring->phys_pdma, soc->reg_map->pdma.tx_ptr);
-		mtk_w32(eth, soc->txrx.tx_dma_size, soc->reg_map->pdma.tx_cnt_cfg);
-		mtk_w32(eth, ring->cpu_idx, soc->reg_map->pdma.pctx_ptr);
-		mtk_w32(eth, MTK_PST_DTX_IDX_CFG(0), soc->reg_map->pdma.rst_idx);
+		mtk_w32(eth, ring->phys_pdma,
+			soc->reg_map->pdma.tx_ptr + ring_no * MTK_QTX_OFFSET);
+		mtk_w32(eth, soc->txrx.tx_dma_size,
+			soc->reg_map->pdma.tx_cnt_cfg + ring_no * MTK_QTX_OFFSET);
+		mtk_w32(eth, ring->cpu_idx,
+			soc->reg_map->pdma.pctx_ptr + ring_no * MTK_QTX_OFFSET);
+		mtk_w32(eth, MTK_PST_DTX_IDX_CFG(ring_no), soc->reg_map->pdma.rst_idx);
 	}
 
 	return 0;
@@ -2877,10 +2918,9 @@
 	return -ENOMEM;
 }
 
-static void mtk_tx_clean(struct mtk_eth *eth)
+static void mtk_tx_clean(struct mtk_eth *eth, struct mtk_tx_ring *ring)
 {
 	const struct mtk_soc_data *soc = eth->soc;
-	struct mtk_tx_ring *ring = &eth->tx_ring;
 	int i;
 
 	if (ring->buf) {
@@ -2952,13 +2992,20 @@
 					       rx_dma_size * eth->soc->txrx.rxd_size,
 					       &ring->phys, GFP_KERNEL);
 	else {
-		struct mtk_tx_ring *tx_ring = &eth->tx_ring;
-		ring->dma = tx_ring->dma +
-			    soc->txrx.tx_dma_size * (dma_addr_t)eth->soc->txrx.txd_size +
-			    soc->txrx.rx_dma_size * (dma_addr_t)eth->soc->txrx.rxd_size * ring_no;
-		ring->phys = tx_ring->phys +
-			     soc->txrx.tx_dma_size * (dma_addr_t)eth->soc->txrx.txd_size +
-			     soc->txrx.rx_dma_size * (dma_addr_t)eth->soc->txrx.rxd_size * ring_no;
+		struct mtk_tx_ring *tx_ring = &eth->tx_ring[0];
+		dma_addr_t offset;
+		int tx_ring_num;
+
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			tx_ring_num = 1;
+		else
+			tx_ring_num = MTK_MAX_TX_RING_NUM;
+
+		offset = (soc->txrx.tx_dma_size * (dma_addr_t)soc->txrx.txd_size * tx_ring_num) +
+			 (soc->txrx.rx_dma_size * (dma_addr_t)soc->txrx.rxd_size * ring_no);
+
+		ring->dma = tx_ring->dma + offset;
+		ring->phys = tx_ring->phys + offset;
 	}
 
 	if (!ring->dma)
@@ -3629,9 +3676,14 @@
 			return err;
 	}
 
-	err = mtk_tx_alloc(eth);
-	if (err)
-		return err;
+	for (i = 0; i < MTK_MAX_TX_RING_NUM; i++) {
+		err = mtk_tx_alloc(eth, i);
+		if (err)
+			return err;
+
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			break;
+	}
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
 		err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_QDMA);
@@ -3692,7 +3744,13 @@
 		eth->scratch_ring = NULL;
 		eth->phy_scratch_ring = 0;
 	}
+
+	for (i = 0; i < MTK_MAX_TX_RING_NUM; i++) {
+		mtk_tx_clean(eth, &eth->tx_ring[i]);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			break;
+	}
-	mtk_tx_clean(eth);
+
 	mtk_rx_clean(eth, &eth->rx_ring[0], soc->has_sram);
 	mtk_rx_clean(eth, &eth->rx_ring_qdma, 0);
 
@@ -3748,18 +3806,52 @@
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth)
+static irqreturn_t mtk_handle_irq_tx(int irq, void *priv)
 {
-	struct mtk_eth *eth = _eth;
+	struct mtk_napi *tx_napi = priv;
+	struct mtk_eth *eth = tx_napi->eth;
 
-	if (likely(napi_schedule_prep(&eth->tx_napi))) {
+	if (likely(napi_schedule_prep(&tx_napi->napi))) {
 		mtk_tx_irq_disable(eth, MTK_TX_DONE_INT(0));
-		__napi_schedule(&eth->tx_napi);
+		__napi_schedule(&tx_napi->napi);
 	}
 
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t mtk_handle_irq_txrx(int irq, void *priv)
+{
+	struct mtk_napi *txrx_napi = priv;
+	struct mtk_eth *eth = txrx_napi->eth;
+	struct mtk_tx_ring *tx_ring = txrx_napi->tx_ring;
+	struct mtk_rx_ring *rx_ring = txrx_napi->rx_ring;
+	const struct mtk_reg_map *reg_map = eth->soc->reg_map;
+
+	if (tx_ring) {
+		if (unlikely(!(mtk_r32(eth, eth->soc->reg_map->pdma.irq_status) &
+			       mtk_r32(eth, eth->soc->reg_map->pdma.irq_mask) &
+			       MTK_TX_DONE_INT(tx_ring->ring_no))))
+			return IRQ_NONE;
+
+		if (likely(napi_schedule_prep(&txrx_napi->napi))) {
+			mtk_tx_irq_disable(eth, MTK_TX_DONE_INT(tx_ring->ring_no));
+			__napi_schedule(&txrx_napi->napi);
+		}
+	} else {
+		if (unlikely(!(mtk_r32(eth, eth->soc->reg_map->pdma.irq_status) &
+			       mtk_r32(eth, eth->soc->reg_map->pdma.irq_mask) &
+			       MTK_RX_DONE_INT(rx_ring->ring_no))))
+			return IRQ_NONE;
+
+		if (likely(napi_schedule_prep(&txrx_napi->napi))) {
+			mtk_rx_irq_disable(eth, MTK_RX_DONE_INT(rx_ring->ring_no));
+			__napi_schedule(&txrx_napi->napi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t mtk_handle_irq(int irq, void *_eth)
 {
 	struct mtk_eth *eth = _eth;
@@ -3773,12 +3865,12 @@
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
 		if (mtk_r32(eth, reg_map->tx_irq_mask) & MTK_TX_DONE_INT(0)) {
 			if (mtk_r32(eth, reg_map->tx_irq_status) & MTK_TX_DONE_INT(0))
-				mtk_handle_irq_tx(irq, _eth);
+				mtk_handle_irq_tx(irq, &eth->tx_napi[0]);
 		}
 	} else {
 		if (mtk_r32(eth, reg_map->pdma.irq_mask) & MTK_TX_DONE_INT(0)) {
 			if (mtk_r32(eth, reg_map->pdma.irq_status) & MTK_TX_DONE_INT(0))
-				mtk_handle_irq_tx(irq, _eth);
+				mtk_handle_irq_tx(irq, &eth->tx_napi[0]);
 		}
 	}
 
@@ -3872,8 +3964,7 @@
 				MTK_PDMA_SIZE_8DWORDS | MTK_TX_WB_DDONE |
 				MTK_CHK_DDONE | MTK_MULTI_EN_V2 |
 				MTK_PDMA_MUTLI_CNT | MTK_PDMA_RESV_BUF |
-				MTK_DEC_WCOMP | MTK_CSR_CLKGATE_BYP,
-				reg_map->pdma.glo_cfg);
+				MTK_CSR_CLKGATE_BYP, reg_map->pdma.glo_cfg);
 		} else {
 			mtk_w32(eth, MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
 				MTK_RX_DMA_EN | MTK_MULTI_EN |
@@ -4041,9 +4132,14 @@
 			mtk_w32(eth, val | MTK_CDMP_STAG_EN, MTK_CDMP_IG_CTRL);
 		}
 
-		napi_enable(&eth->tx_napi);
+		for (i = 0; i < MTK_TX_NAPI_NUM; i++) {
+			napi_enable(&eth->tx_napi[i].napi);
+			mtk_tx_irq_enable(eth, MTK_TX_DONE_INT(i));
+			if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+				break;
+		}
+
 		napi_enable(&eth->rx_napi[0].napi);
-		mtk_tx_irq_enable(eth, MTK_TX_DONE_INT(0));
 		mtk_rx_irq_enable(eth, MTK_RX_DONE_INT(0));
 
 		if (MTK_HAS_CAPS(eth->soc->caps, MTK_RSS)) {
@@ -4161,10 +4257,14 @@
 	if (!refcount_dec_and_test(&eth->dma_refcnt))
 		return 0;
 
+	for (i = 0; i < MTK_TX_NAPI_NUM; i++) {
+		mtk_tx_irq_disable(eth, MTK_TX_DONE_INT(i));
+		napi_disable(&eth->tx_napi[i].napi);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			break;
+	}
 
-	mtk_tx_irq_disable(eth, MTK_TX_DONE_INT(0));
 	mtk_rx_irq_disable(eth, MTK_RX_DONE_INT(0));
-	napi_disable(&eth->tx_napi);
 	napi_disable(&eth->rx_napi[0].napi);
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_RSS)) {
@@ -4241,9 +4341,20 @@
 
 static int mtk_napi_init(struct mtk_eth *eth)
 {
-	struct mtk_napi *rx_napi = &eth->rx_napi[0];
+	struct mtk_napi *rx_napi;
+	struct mtk_napi *tx_napi;
 	int i;
 
+	for (i = 0; i < MTK_TX_NAPI_NUM; i++) {
+		tx_napi = &eth->tx_napi[i];
+		tx_napi->eth = eth;
+		tx_napi->tx_ring = &eth->tx_ring[i];
+
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			break;
+	}
+
+	rx_napi = &eth->rx_napi[0];
 	rx_napi->eth = eth;
 	rx_napi->rx_ring = &eth->rx_ring[0];
 	rx_napi->irq_grp_no = 2;
@@ -4366,16 +4477,19 @@
 	mtk_rx_irq_disable(eth, ~0);
 
 	/* FE int grouping */
-	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
 		mtk_w32(eth, MTK_TX_DONE_INT(0), reg_map->qdma.int_grp);
-	else
-		mtk_w32(eth, MTK_TX_DONE_INT(0), reg_map->pdma.int_grp);
+	} else {
+		mtk_w32(eth, MTK_TX_DONE_INT(1), MTK_PDMA_INT_GRP1);
+		mtk_w32(eth, MTK_TX_DONE_INT(2), MTK_PDMA_INT_GRP2);
+		mtk_w32(eth, MTK_TX_DONE_INT(3), MTK_PDMA_INT_GRP3);
+	}
 	mtk_w32(eth, MTK_RX_DONE_INT(0), reg_map->qdma.int_grp2);
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_PDMA_INT)) {
 		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
 			mtk_w32(eth, 0x210FFFF2, MTK_FE_INT_GRP);
 		else
-			mtk_w32(eth, 0xFFFF1FF2, MTK_FE_INT_GRP);
+			mtk_w32(eth, 0xFFFFFFF2, MTK_FE_INT_GRP);
 	} else {
 		mtk_w32(eth, MTK_RX_DONE_INT(0), reg_map->pdma.int_grp);
 		mtk_w32(eth, 0x210F2FF3, MTK_FE_INT_GRP);
@@ -5042,6 +5156,9 @@
 	struct mtk_eth *eth = mac->hw;
 	unsigned int queue = 0;
 
+	if (!MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+		return (skb->mark < MTK_PDMA_TX_NUM) ? skb->mark : 0;
+
 	if (skb->mark > 0 && skb->mark < MTK_QDMA_TX_NUM)
 		return skb->mark;
 
@@ -5265,7 +5382,7 @@
 	struct mtk_phylink_priv *phylink_priv;
 	struct fwnode_handle *fixed_node;
 	struct gpio_desc *desc;
-	int txqs = 1;
+	int txqs;
 
 	if (!_id) {
 		dev_err(eth->dev, "missing mac id\n");
@@ -5285,6 +5402,8 @@
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
 		txqs = MTK_QDMA_TX_NUM;
+	else
+		txqs = MTK_PDMA_TX_NUM;
 
 	eth->netdev[id] = alloc_etherdev_mqs(sizeof(*mac), txqs, 1);
 	if (!eth->netdev[id]) {
@@ -5670,9 +5789,18 @@
 				       mtk_handle_irq, 0,
 				       dev_name(eth->dev), eth);
 	} else {
-		err = devm_request_irq(eth->dev, eth->irq_fe[1],
-				       mtk_handle_irq_tx, 0,
-				       dev_name(eth->dev), eth);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA)) {
+			err = devm_request_irq(eth->dev, eth->irq_fe[1],
+					       mtk_handle_irq_tx, 0,
+					       dev_name(eth->dev), &eth->tx_napi[0]);
+		} else {
+			for (i = 0; i < MTK_MAX_TX_RING_NUM; i++) {
+				err = devm_request_irq(eth->dev, eth->irq_pdma[i],
+						       mtk_handle_irq_txrx, IRQF_SHARED,
+						       dev_name(eth->dev), &eth->tx_napi[i]);
+			}
+		}
+
 		if (err)
 			goto err_free_dev;
 
@@ -5684,7 +5812,7 @@
 				goto err_free_dev;
 
 			err = devm_request_irq(eth->dev, eth->irq_pdma[0],
-					       mtk_handle_irq_rx, IRQF_SHARED,
+					       mtk_handle_irq_txrx, IRQF_SHARED,
 					       dev_name(eth->dev), &eth->rx_napi[0]);
 			if (err)
 				goto err_free_dev;
@@ -5693,7 +5821,7 @@
 				for (i = 0; i < MTK_RX_RSS_NUM; i++) {
 					err = devm_request_irq(eth->dev,
 							       eth->irq_pdma[MTK_RSS_RING(i)],
-							       mtk_handle_irq_rx, IRQF_SHARED,
+							       mtk_handle_irq_txrx, IRQF_SHARED,
 							       dev_name(eth->dev),
 							       &eth->rx_napi[MTK_RSS_RING(i)]);
 					if (err)
@@ -5707,7 +5835,7 @@
 				for (; i < MTK_HW_LRO_RING_NUM; i++) {
 					err = devm_request_irq(eth->dev,
 							       eth->irq_pdma[i],
-							       mtk_handle_irq_rx, IRQF_SHARED,
+							       mtk_handle_irq_txrx, IRQF_SHARED,
 							       dev_name(eth->dev),
 							       &eth->rx_napi[MTK_HW_LRO_RING(i)]);
 					if (err)
@@ -5759,8 +5887,14 @@
 	 * for NAPI to work
 	 */
 	init_dummy_netdev(&eth->dummy_dev);
-	netif_napi_add(&eth->dummy_dev, &eth->tx_napi, mtk_napi_tx,
-		       MTK_NAPI_WEIGHT);
+
+	for (i = 0; i < MTK_TX_NAPI_NUM; i++) {
+		netif_napi_add(&eth->dummy_dev, &eth->tx_napi[i].napi, mtk_napi_tx,
+			       MTK_NAPI_WEIGHT);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			break;
+	}
+
 	netif_napi_add(&eth->dummy_dev, &eth->rx_napi[0].napi, mtk_napi_rx,
 		       MTK_NAPI_WEIGHT);
 
@@ -5819,7 +5953,12 @@
 
 	mtk_hw_deinit(eth);
 
+	for (i = 0; i < MTK_TX_NAPI_NUM; i++) {
+		netif_napi_del(&eth->tx_napi[i].napi);
+		if (MTK_HAS_CAPS(eth->soc->caps, MTK_QDMA))
+			break;
+	}
+
-	netif_napi_del(&eth->tx_napi);
 	netif_napi_del(&eth->rx_napi[0].napi);
 
 	if (MTK_HAS_CAPS(eth->soc->caps, MTK_RSS)) {
diff --git a/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 226f808..66c2a54 100644
--- a/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/21.02/files/target/linux/mediatek/files-5.4/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -203,9 +203,11 @@
 #endif
 /* PDMA TX CPU Pointer Register */
 #define MTK_PTX_CTX_IDX0	(PDMA_BASE + 0x08)
+#define MTK_PTX_CTX_IDX_CFG(x)	(MTK_PTX_CTX_IDX0 + ((x) * 0x10))
 
 /* PDMA TX DMA Pointer Register */
 #define MTK_PTX_DTX_IDX0	(PDMA_BASE + 0x0c)
+#define MTK_PTX_DTX_IDX_CFG(x)	(MTK_PTX_DTX_IDX0 + ((x) * 0x10))
 
 /* PDMA RX Base Pointer Register */
 #define MTK_PRX_BASE_PTR0	(PDMA_BASE + 0x100)
@@ -417,6 +419,7 @@
 #define MTK_QTX_CFG(x)			(QDMA_BASE + (x * 0x10))
 #define MTK_QTX_CFG_HW_RESV_CNT_OFFSET	GENMASK(15, 8)
 #define MTK_QTX_CFG_SW_RESV_CNT_OFFSET	GENMASK(7, 0)
+#define MTK_QTX_OFFSET			0x10
 #define QDMA_RES_THRES			4
 
 /* QDMA TX Queue Scheduler Registers */
@@ -689,6 +692,11 @@
 #define RX_DMA_GET_SPORT(_x) 	(((_x) >> RX_DMA_SPORT_SHIFT) & RX_DMA_SPORT_MASK)
 #define RX_DMA_GET_SPORT_V2(_x) (((_x) >> RX_DMA_SPORT_SHIFT_V2) & RX_DMA_SPORT_MASK_V2)
 
+/* PDMA TX Num */
+#define MTK_MAX_TX_RING_NUM	(4)
+#define MTK_TX_NAPI_NUM		(4)
+#define MTK_PDMA_TX_NUM		(4)
+
 /* PDMA V2 descriptor txd4 */
 #define TX_DMA_LS1_V2	BIT(30)
 
@@ -1391,6 +1399,7 @@
 	void *next_free;
 	void *last_free;
 	u32 last_free_ptr;
+	u32 ring_no;
 	u16 thresh;
 	atomic_t free_count;
 	int dma_size;
@@ -1450,6 +1459,7 @@
 struct mtk_napi {
 	struct napi_struct	napi;
 	struct mtk_eth		*eth;
+	struct mtk_tx_ring	*tx_ring;
 	struct mtk_rx_ring	*rx_ring;
 	u32			irq_grp_no;
 };
@@ -1919,10 +1929,10 @@
 	struct regmap			*pctl;
 	bool				hwlro;
 	refcount_t			dma_refcnt;
-	struct mtk_tx_ring		tx_ring;
+	struct mtk_tx_ring		tx_ring[MTK_MAX_TX_RING_NUM];
 	struct mtk_rx_ring		rx_ring[MTK_MAX_RX_RING_NUM];
 	struct mtk_rx_ring		rx_ring_qdma;
-	struct napi_struct		tx_napi;
+	struct mtk_napi			tx_napi[MTK_TX_NAPI_NUM];
 	struct mtk_napi			rx_napi[MTK_RX_NAPI_NUM];
 	struct mtk_rss_params		rss_params;
 	void				*scratch_ring;