[][MAC80211][hnat][Improve ETH to ETH performace in an unbalanced PHY rate test for the mt7988]

[Description]
Add two pachtes to improve ETH to ETH performace in an unbalanced PHY
rate test for the mt7988.

These patches will enlarge FQ DMA size to 4K and dispatch short packets
a high priority queue in the PPPQ path.

Without this patch, the performance of ETH to ETH cannot reach line rate
in an unbalanced PHY rate test for the mt7988.

[Release-log]
N/A


Change-Id: I572c886af40fbce236ba93ccfa1118b246f608d7
Reviewed-on: https://gerrit.mediatek.inc/c/openwrt/feeds/mtk_openwrt_feeds/+/8312578
diff --git a/autobuild_mac80211_release/mt7988_mt7996_mac80211/target/linux/mediatek/patches-5.4/999-2952-net-ethernet-mtk_eth_soc-modify-fq-size-4K.patch b/autobuild_mac80211_release/mt7988_mt7996_mac80211/target/linux/mediatek/patches-5.4/999-2952-net-ethernet-mtk_eth_soc-modify-fq-size-4K.patch
new file mode 100644
index 0000000..2d4bfaa
--- /dev/null
+++ b/autobuild_mac80211_release/mt7988_mt7996_mac80211/target/linux/mediatek/patches-5.4/999-2952-net-ethernet-mtk_eth_soc-modify-fq-size-4K.patch
@@ -0,0 +1,222 @@
+From 4136e3567f6904259babbe5ae5c0d0bf06413f57 Mon Sep 17 00:00:00 2001
+From: Bo-Cun Chen <bc-bocun.chen@mediatek.com>
+Date: Wed, 25 Oct 2023 09:21:06 +0800
+Subject: [PATCH] 999-2952-net-ethernet-mtk_eth_soc-modify-fq-size-4K
+
+---
+ drivers/net/ethernet/mediatek/mtk_eth_dbg.c   |  2 +-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c   | 74 ++++++++++---------
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h   |  5 +-
+ .../ethernet/mediatek/mtk_hnat/hnat_debugfs.c |  2 +-
+ .../ethernet/mediatek/mtk_hnat/hnat_nf_hook.c | 23 ++++++
+ 5 files changed, 69 insertions(+), 37 deletions(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_dbg.c b/drivers/net/ethernet/mediatek/mtk_eth_dbg.c
+index e50e1ac..7c137e5 100755
+--- a/drivers/net/ethernet/mediatek/mtk_eth_dbg.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_dbg.c
+@@ -882,7 +882,7 @@ int hwtx_ring_read(struct seq_file *seq, void *v)
+ 	struct mtk_tx_dma_v2 *hwtx_ring;
+ 	int i = 0;
+ 
+-	for (i = 0; i < MTK_DMA_SIZE; i++) {
++	for (i = 0; i < MTK_DMA_FQ_SIZE; i++) {
+ 		dma_addr_t addr = eth->phy_scratch_ring +
+ 				  i * (dma_addr_t)eth->soc->txrx.txd_size;
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 1226dd6..fe9c1de 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -1557,10 +1557,10 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
+ {
+ 	const struct mtk_soc_data *soc = eth->soc;
+ 	dma_addr_t phy_ring_tail;
+-	int cnt = MTK_DMA_SIZE;
++	int cnt = MTK_DMA_FQ_SIZE;
+ 	dma_addr_t dma_addr;
+ 	u64 addr64 = 0;
+-	int i;
++	int i, j, len;
+ 
+ 	if (!eth->soc->has_sram) {
+ 		eth->scratch_ring = dma_alloc_coherent(eth->dma_dev,
+@@ -1577,40 +1577,44 @@ static int mtk_init_fq_dma(struct mtk_eth *eth)
+ 	if (unlikely(!eth->scratch_ring))
+                         return -ENOMEM;
+ 
+-	eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE, GFP_KERNEL);
+-	if (unlikely(!eth->scratch_head))
+-		return -ENOMEM;
+-
+-	dma_addr = dma_map_single(eth->dma_dev,
+-				  eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE,
+-				  DMA_FROM_DEVICE);
+-	if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
+-		return -ENOMEM;
+-
+ 	phy_ring_tail = eth->phy_scratch_ring +
+ 			(dma_addr_t)soc->txrx.txd_size * (cnt - 1);
+ 
+-	for (i = 0; i < cnt; i++) {
+-		struct mtk_tx_dma_v2 *txd;
++	for (j = 0; j < DIV_ROUND_UP(MTK_DMA_FQ_SIZE, MTK_DMA_FQ_LENGTH); j++) {
++		len = min_t(int, cnt - j * MTK_DMA_FQ_LENGTH, MTK_DMA_FQ_LENGTH);
+ 
+-		txd = eth->scratch_ring + i * soc->txrx.txd_size;
+-		txd->txd1 = dma_addr + i * MTK_QDMA_PAGE_SIZE;
+-		if (i < cnt - 1)
+-			txd->txd2 = eth->phy_scratch_ring +
+-				(i + 1) * soc->txrx.txd_size;
++		eth->scratch_head[j] = kcalloc(len, MTK_QDMA_PAGE_SIZE, GFP_KERNEL);
++		if (unlikely(!eth->scratch_head[j]))
++			return -ENOMEM;
+ 
+-		addr64 = (MTK_HAS_CAPS(eth->soc->caps, MTK_8GB_ADDRESSING)) ?
+-			  TX_DMA_SDP1(dma_addr + i * MTK_QDMA_PAGE_SIZE) : 0;
++		dma_addr = dma_map_single(eth->dma_dev,
++					  eth->scratch_head[j], len * MTK_QDMA_PAGE_SIZE,
++					  DMA_FROM_DEVICE);
++		if (unlikely(dma_mapping_error(eth->dma_dev, dma_addr)))
++			return -ENOMEM;
+ 
+-		txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE) | addr64;
+-		txd->txd4 = 0;
++		for (i = 0; i < len; i++) {
++			struct mtk_tx_dma_v2 *txd;
+ 
+-		if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) ||
+-		    MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V3)) {
+-			txd->txd5 = 0;
+-			txd->txd6 = 0;
+-			txd->txd7 = 0;
+-			txd->txd8 = 0;
++			txd = eth->scratch_ring + (j * MTK_DMA_FQ_LENGTH + i) * soc->txrx.txd_size;
++			txd->txd1 = dma_addr + i * MTK_QDMA_PAGE_SIZE;
++			if (j * MTK_DMA_FQ_LENGTH + i < cnt)
++				txd->txd2 = eth->phy_scratch_ring +
++					(j * MTK_DMA_FQ_LENGTH + i + 1) * soc->txrx.txd_size;
++
++			addr64 = (MTK_HAS_CAPS(eth->soc->caps, MTK_8GB_ADDRESSING)) ?
++				  TX_DMA_SDP1(dma_addr + i * MTK_QDMA_PAGE_SIZE) : 0;
++
++			txd->txd3 = TX_DMA_PLEN0(MTK_QDMA_PAGE_SIZE) | addr64;
++			txd->txd4 = 0;
++
++			if (MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V2) ||
++			    MTK_HAS_CAPS(eth->soc->caps, MTK_NETSYS_V3)) {
++				txd->txd5 = 0;
++				txd->txd6 = 0;
++				txd->txd7 = 0;
++				txd->txd8 = 0;
++			}
+ 		}
+ 	}
+ 
+@@ -2541,9 +2545,9 @@ static int mtk_tx_alloc(struct mtk_eth *eth)
+ 		ring->dma = dma_alloc_coherent(eth->dma_dev, MTK_DMA_SIZE * sz,
+ 					       &ring->phys, GFP_KERNEL);
+ 	else {
+-		ring->dma =  eth->scratch_ring + MTK_DMA_SIZE * sz;
++		ring->dma =  eth->scratch_ring + MTK_DMA_FQ_SIZE * sz;
+ 		ring->phys = eth->phy_scratch_ring +
+-			     MTK_DMA_SIZE * (dma_addr_t)sz;
++			     MTK_DMA_FQ_SIZE * (dma_addr_t)sz;
+ 	}
+ 
+ 	if (!ring->dma)
+@@ -3349,9 +3353,11 @@ static void mtk_dma_free(struct mtk_eth *eth)
+ 			mtk_rx_clean(eth, &eth->rx_ring[MTK_RSS_RING(i)], 1);
+ 	}
+ 
+-	if (eth->scratch_head) {
+-		kfree(eth->scratch_head);
+-		eth->scratch_head = NULL;
++	for (i = 0; i < DIV_ROUND_UP(MTK_DMA_FQ_SIZE, MTK_DMA_FQ_LENGTH); i++) {
++		if (eth->scratch_head[i]) {
++			kfree(eth->scratch_head[i]);
++			eth->scratch_head[i] = NULL;
++		}
+ 	}
+ }
+ 
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index fe8bdee..cd2de23 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -20,6 +20,9 @@
+ #define	MTK_MAX_RX_LENGTH	1536
+ #define MTK_MIN_TX_LENGTH	60
+ #define MTK_DMA_SIZE		2048
++#define MTK_DMA_FQ_SIZE		4096
++#define MTK_DMA_FQ_HEAD		32
++#define MTK_DMA_FQ_LENGTH	2048
+ #define MTK_NAPI_WEIGHT		256
+ 
+ #if defined(CONFIG_MEDIATEK_NETSYS_V3)
+@@ -1837,7 +1840,7 @@ struct mtk_eth {
+ 	void				*scratch_ring;
+ 	struct mtk_reset_event		reset_event;
+ 	dma_addr_t			phy_scratch_ring;
+-	void				*scratch_head;
++	void				*scratch_head[MTK_DMA_FQ_HEAD];
+ 	struct clk			*clks[MTK_CLK_MAX];
+ 
+ 	struct mii_bus			*mii_bus;
+diff --git a/drivers/net/ethernet/mediatek/mtk_hnat/hnat_debugfs.c b/drivers/net/ethernet/mediatek/mtk_hnat/hnat_debugfs.c
+index fc7d216..b97fd6a 100644
+--- a/drivers/net/ethernet/mediatek/mtk_hnat/hnat_debugfs.c
++++ b/drivers/net/ethernet/mediatek/mtk_hnat/hnat_debugfs.c
+@@ -2784,7 +2784,7 @@ static ssize_t hnat_qos_toggle_write(struct file *file, const char __user *buffe
+ 		qos_toggle = 1;
+ 	} else if (buf[0] == '2') {
+ 		pr_info("Per-port-per-queue mode is going to be enabled!\n");
+-		pr_info("PPPQ use qid 0~5 (scheduler 0).\n");
++		pr_info("PPPQ use qid 0~11 (scheduler 0).\n");
+ 		qos_toggle = 2;
+ 		qos_dl_toggle = 1;
+ 		qos_ul_toggle = 1;
+diff --git a/drivers/net/ethernet/mediatek/mtk_hnat/hnat_nf_hook.c b/drivers/net/ethernet/mediatek/mtk_hnat/hnat_nf_hook.c
+index 85c38e0..6a373f8 100644
+--- a/drivers/net/ethernet/mediatek/mtk_hnat/hnat_nf_hook.c
++++ b/drivers/net/ethernet/mediatek/mtk_hnat/hnat_nf_hook.c
+@@ -1219,6 +1219,7 @@ static unsigned int skb_to_hnat_info(struct sk_buff *skb,
+ 	int udp = 0;
+ 	u32 qid = 0;
+ 	u32 port_id = 0;
++	u32 payload_len = 0;
+ 	int mape = 0;
+ 
+ 	ct = nf_ct_get(skb, &ctinfo);
+@@ -1748,6 +1749,28 @@ static unsigned int skb_to_hnat_info(struct sk_buff *skb,
+ 	else
+ 		qid = 0;
+ 
++	if (IS_PPPQ_MODE && IS_PPPQ_PATH(dev, skb)) {
++		if (ntohs(eth->h_proto) == ETH_P_IP) {
++			iph = ip_hdr(skb);
++			if (iph->protocol == IPPROTO_TCP) {
++				skb_set_transport_header(skb, sizeof(struct iphdr));
++				payload_len = be16_to_cpu(iph->tot_len) - skb_transport_offset(skb) - tcp_hdrlen(skb);
++				/* Dispatch ACK packets to high priority queue */
++				if (payload_len == 0)
++					qid += 6;
++			}
++		} else if (ntohs(eth->h_proto) == ETH_P_IPV6) {
++			ip6h = ipv6_hdr(skb);
++			if (ip6h->nexthdr == NEXTHDR_TCP) {
++				skb_set_transport_header(skb, sizeof(struct ipv6hdr));
++				payload_len = be16_to_cpu(ip6h->payload_len) - tcp_hdrlen(skb);
++				/* Dispatch ACK packets to high priority queue */
++				if (payload_len == 0)
++					qid += 6;
++			}
++		}
++	}
++
+ 	if (IS_IPV4_GRP(foe)) {
+ 		entry.ipv4_hnapt.iblk2.dp = gmac;
+ 		entry.ipv4_hnapt.iblk2.port_mg =
+-- 
+2.18.0
+