mmc: am654_sdhci: Add tuning algorithm for delay chain

Currently the sdhci_am654 driver only supports one tuning
algorithm which should be used only when DLL is enabled. The
ITAPDLY is selected from the largest passing window and the
buffer is viewed as a circular buffer.

The new tuning algorithm should be used when the delay chain
is enabled; the ITAPDLY is selected from the largest passing
window and the buffer is not viewed as a circular buffer.

This implementation is based off of the following paper: [1].

Also add support for multiple failing windows.

[1] https://www.ti.com/lit/an/spract9/spract9.pdf

Fixes: a759abf569d4 ("mmc: am654_sdhci: Add support for software tuning")
Signed-off-by: Judith Mendez <jm@ti.com>
diff --git a/drivers/mmc/am654_sdhci.c b/drivers/mmc/am654_sdhci.c
index 2139fea..7e41dd9 100644
--- a/drivers/mmc/am654_sdhci.c
+++ b/drivers/mmc/am654_sdhci.c
@@ -97,6 +97,7 @@
 	u32 strb_sel;
 	u32 clkbuf_sel;
 	u32 flags;
+	bool dll_enable;
 #define DLL_PRESENT	BIT(0)
 #define IOMUX_PRESENT	BIT(1)
 #define FREQSEL_2_BIT	BIT(2)
@@ -110,6 +111,12 @@
 	u32 capability;
 };
 
+struct window {
+	u8 start;
+	u8 end;
+	u8 length;
+};
+
 static const struct timing_data td[] = {
 	[MMC_LEGACY]	= {"ti,otap-del-sel-legacy",
 			   "ti,itap-del-sel-legacy",
@@ -280,8 +287,11 @@
 		ret = am654_sdhci_setup_dll(plat, speed);
 		if (ret)
 			return ret;
+
+		plat->dll_enable = true;
 	} else {
 		am654_sdhci_setup_delay_chain(plat, mode);
+		plat->dll_enable = false;
 	}
 
 	regmap_update_bits(plat->base, PHY_CTRL5, CLKBUFSEL_MASK,
@@ -375,38 +385,99 @@
 	writeb(val, host->ioaddr + reg);
 }
 #ifdef MMC_SUPPORTS_TUNING
-#define ITAP_MAX	32
+#define ITAPDLY_LENGTH 32
+#define ITAPDLY_LAST_INDEX (ITAPDLY_LENGTH - 1)
+
+static u32 am654_sdhci_calculate_itap(struct udevice *dev, struct window
+			  *fail_window, u8 num_fails, bool circular_buffer)
+{
+	u8 itap = 0, start_fail = 0, end_fail = 0, pass_length = 0;
+	u8 first_fail_start = 0, last_fail_end = 0;
+	struct window pass_window = {0, 0, 0};
+	int prev_fail_end = -1;
+	u8 i;
+
+	if (!num_fails)
+		return ITAPDLY_LAST_INDEX >> 1;
+
+	if (fail_window->length == ITAPDLY_LENGTH) {
+		dev_err(dev, "No passing ITAPDLY, return 0\n");
+		return 0;
+	}
+
+	first_fail_start = fail_window->start;
+	last_fail_end = fail_window[num_fails - 1].end;
+
+	for (i = 0; i < num_fails; i++) {
+		start_fail = fail_window[i].start;
+		end_fail = fail_window[i].end;
+		pass_length = start_fail - (prev_fail_end + 1);
+
+		if (pass_length > pass_window.length) {
+			pass_window.start = prev_fail_end + 1;
+			pass_window.length = pass_length;
+		}
+		prev_fail_end = end_fail;
+	}
+
+	if (!circular_buffer)
+		pass_length = ITAPDLY_LAST_INDEX - last_fail_end;
+	else
+		pass_length = ITAPDLY_LAST_INDEX - last_fail_end + first_fail_start;
+
+	if (pass_length > pass_window.length) {
+		pass_window.start = last_fail_end + 1;
+		pass_window.length = pass_length;
+	}
+
+	if (!circular_buffer)
+		itap = pass_window.start + (pass_window.length >> 1);
+	else
+		itap = (pass_window.start + (pass_window.length >> 1)) % ITAPDLY_LENGTH;
+
+	return (itap > ITAPDLY_LAST_INDEX) ? ITAPDLY_LAST_INDEX >> 1 : itap;
+}
+
 static int am654_sdhci_execute_tuning(struct mmc *mmc, u8 opcode)
 {
 	struct udevice *dev = mmc->dev;
 	struct am654_sdhci_plat *plat = dev_get_plat(dev);
-	int cur_val, prev_val = 1, fail_len = 0, pass_window = 0, pass_len;
-	u32 itap;
+	struct window fail_window[ITAPDLY_LENGTH];
+	u8 curr_pass, itap;
+	u8 fail_index = 0;
+	u8 prev_pass = 1;
+
+	memset(fail_window, 0, sizeof(fail_window));
 
 	/* Enable ITAPDLY */
 	regmap_update_bits(plat->base, PHY_CTRL4, ITAPDLYENA_MASK,
 			   1 << ITAPDLYENA_SHIFT);
 
-	for (itap = 0; itap < ITAP_MAX; itap++) {
+	for (itap = 0; itap < ITAPDLY_LENGTH; itap++) {
 		am654_sdhci_write_itapdly(plat, itap);
 
+		curr_pass = !mmc_send_tuning(mmc, opcode);
+
-		cur_val = !mmc_send_tuning(mmc, opcode);
-		if (cur_val && !prev_val)
-			pass_window = itap;
+		if (!curr_pass && prev_pass)
+			fail_window[fail_index].start = itap;
 
-		if (!cur_val)
-			fail_len++;
+		if (!curr_pass) {
+			fail_window[fail_index].end = itap;
+			fail_window[fail_index].length++;
+		}
+
+		if (curr_pass && !prev_pass)
+			fail_index++;
 
-		prev_val = cur_val;
+		prev_pass = curr_pass;
 	}
-	/*
-	 * Having determined the length of the failing window and start of
-	 * the passing window calculate the length of the passing window and
-	 * set the final value halfway through it considering the range as a
-	 * circular buffer
-	 */
-	pass_len = ITAP_MAX - fail_len;
-	itap = (pass_window + (pass_len >> 1)) % ITAP_MAX;
+
+	if (fail_window[fail_index].length != 0)
+		fail_index++;
+
+	itap = am654_sdhci_calculate_itap(dev, fail_window, fail_index,
+					  plat->dll_enable);
+
 	am654_sdhci_write_itapdly(plat, itap);
 
 	return 0;