clk: renesas: Switch to new SD clock handling

The old SD handling code was huge and could not handle all the details
which showed up on R-Car Gen3 SoCs meanwhile. It is time to switch to
another design. Have SDnH a separate clock, use the existing divider
clocks and move the errata handling from the clock driver to the SDHI
driver where it belongs.

Based on Linux series by Wolfram Sang, commit bb6d3fa98a41 ("clk:
renesas: rcar-gen3: Switch to new SD clock handling") and commit
e5f7e81ee430a ("mmc: renesas_sdhi: Parse DT for SDnH")

Signed-off-by: Hai Pham <hai.pham.ud@renesas.com>
Signed-off-by: Marek Vasut <marek.vasut+renesas@mailbox.org>
Marek: - Add rcar_clk_* prefix to all functions
       - Fix missing ~ in GENMASK(a, b), use clrsetbits_le32 instead
       - Use DIV_ROUND_CLOSEST, else if parent clock = 199999992 and
         rate = 200000000, the divider would be 0 and table lookup
	 would fail.
       - Turn rcar_clk_get_table_val into signed integer, so it can
         return 0 as a valid value and negative values as errors.
       - Make the code operate on correct clock and add comment
         which explains the reasoning behind it.
       - Rebase on changes to
         clk: renesas: Introduce and use rcar_clk_get_rate64_div_table function
diff --git a/drivers/clk/renesas/clk-rcar-gen3.c b/drivers/clk/renesas/clk-rcar-gen3.c
index aea8b1e..53f16df 100644
--- a/drivers/clk/renesas/clk-rcar-gen3.c
+++ b/drivers/clk/renesas/clk-rcar-gen3.c
@@ -34,55 +34,11 @@
 #define CPG_PLL2CR		0x002c
 #define CPG_PLL4CR		0x01f4
 
-/* Non-constant mask variant of FIELD_GET */
-#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1))
-
-/*
- * SDn Clock
- */
-#define CPG_SD_STP_HCK		BIT(9)
-#define CPG_SD_STP_CK		BIT(8)
-
-#define CPG_SD_STP_MASK		(CPG_SD_STP_HCK | CPG_SD_STP_CK)
-#define CPG_SD_FC_MASK		(0x7 << 2 | 0x3 << 0)
-
-#define CPG_SD_DIV_TABLE_DATA(stp_hck, stp_ck, sd_srcfc, sd_fc, sd_div) \
-{ \
-	.val = ((stp_hck) ? CPG_SD_STP_HCK : 0) | \
-	       ((stp_ck) ? CPG_SD_STP_CK : 0) | \
-	       ((sd_srcfc) << 2) | \
-	       ((sd_fc) << 0), \
-	.div = (sd_div), \
-}
+#define SDnSRCFC_SHIFT		2
+#define STPnHCK_TABLE		(CPG_SDCKCR_STPnHCK >> SDnSRCFC_SHIFT)
 
-/* SDn divider
- *                     sd_srcfc   sd_fc   div
- * stp_hck   stp_ck    (div)      (div)     = sd_srcfc x sd_fc
- *-------------------------------------------------------------------
- *  0         0         0 (1)      1 (4)      4
- *  0         0         1 (2)      1 (4)      8
- *  1         0         2 (4)      1 (4)     16
- *  1         0         3 (8)      1 (4)     32
- *  1         0         4 (16)     1 (4)     64
- *  0         0         0 (1)      0 (2)      2
- *  0         0         1 (2)      0 (2)      4
- *  1         0         2 (4)      0 (2)      8
- *  1         0         3 (8)      0 (2)     16
- *  1         0         4 (16)     0 (2)     32
- */
-static const struct clk_div_table cpg_sd_div_table[] = {
-/*	CPG_SD_DIV_TABLE_DATA(stp_hck,  stp_ck,   sd_srcfc,   sd_fc,  sd_div) */
-	CPG_SD_DIV_TABLE_DATA(0,        0,        0,          1,        4),
-	CPG_SD_DIV_TABLE_DATA(0,        0,        1,          1,        8),
-	CPG_SD_DIV_TABLE_DATA(1,        0,        2,          1,       16),
-	CPG_SD_DIV_TABLE_DATA(1,        0,        3,          1,       32),
-	CPG_SD_DIV_TABLE_DATA(1,        0,        4,          1,       64),
-	CPG_SD_DIV_TABLE_DATA(0,        0,        0,          0,        2),
-	CPG_SD_DIV_TABLE_DATA(0,        0,        1,          0,        4),
-	CPG_SD_DIV_TABLE_DATA(1,        0,        2,          0,        8),
-	CPG_SD_DIV_TABLE_DATA(1,        0,        3,          0,       16),
-	CPG_SD_DIV_TABLE_DATA(1,        0,        4,          0,       32),
-};
+/* Non-constant mask variant of FIELD_GET/FIELD_PREP */
+#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1))
 
 static const struct clk_div_table cpg_rpcsrc_div_table[] = {
 	{ 2, 5 }, { 3, 6 }, { 0, 0 },
@@ -92,6 +48,15 @@
 	{ 1, 2 }, { 3, 4 }, { 5, 6 }, { 7, 8 }, { 0, 0 },
 };
 
+static const struct clk_div_table cpg_sdh_div_table[] = {
+	{ 0, 1 }, { 1, 2 }, { STPnHCK_TABLE | 2, 4 }, { STPnHCK_TABLE | 3, 8 },
+	{ STPnHCK_TABLE | 4, 16 }, { 0, 0 },
+};
+
+static const struct clk_div_table cpg_sd_div_table[] = {
+	{ 0, 2 }, { 1, 4 }, { 0, 0 },
+};
+
 static unsigned int rcar_clk_get_table_div(const struct clk_div_table *table,
 					   const u32 value)
 {
@@ -103,6 +68,17 @@
 	return 0;
 }
 
+static int rcar_clk_get_table_val(const struct clk_div_table *table,
+				  unsigned int div)
+{
+	const struct clk_div_table *clkt;
+
+	for (clkt = table; clkt->div; clkt++)
+		if (clkt->div == div)
+			return clkt->val;
+	return -EINVAL;
+}
+
 static __always_inline s64
 rcar_clk_get_rate64_div_table(unsigned int parent, u64 parent_rate,
 			      void __iomem *reg, const u32 mask,
@@ -145,18 +121,45 @@
 	return renesas_clk_get_parent(clk, info, parent);
 }
 
+static int gen3_clk_enable(struct clk *clk)
+{
+	struct gen3_clk_priv *priv = dev_get_priv(clk->dev);
+
+	return renesas_clk_endisable(clk, priv->base, priv->info, true);
+}
+
+static int gen3_clk_disable(struct clk *clk)
+{
+	struct gen3_clk_priv *priv = dev_get_priv(clk->dev);
+
+	return renesas_clk_endisable(clk, priv->base, priv->info, false);
+}
+
+static u64 gen3_clk_get_rate64(struct clk *clk);
+
 static int gen3_clk_setup_sdif_div(struct clk *clk, ulong rate)
 {
 	struct gen3_clk_priv *priv = dev_get_priv(clk->dev);
 	struct cpg_mssr_info *info = priv->info;
 	const struct cpg_core_clk *core;
-	struct clk parent;
+	struct clk parent, grandparent;
 	int ret;
+	u32 value = 0, div = 0;
 
-	ret = gen3_clk_get_parent(priv, clk, info, &parent);
-	if (ret) {
-		printf("%s[%i] parent fail, ret=%i\n", __func__, __LINE__, ret);
-		return ret;
+	/*
+	 * The clk may be either CPG_MOD or core clock, in case this is MOD
+	 * clock, use core clock one level up, otherwise use the clock as-is.
+	 * Note that parent clock here always represents core clock. Also note
+	 * that grandparent clock are the parent clock of the core clock here.
+	 */
+	if (renesas_clk_is_mod(clk)) {
+		ret = gen3_clk_get_parent(priv, clk, info, &parent);
+		if (ret) {
+			printf("%s[%i] parent fail, ret=%i\n", __func__, __LINE__, ret);
+			return ret;
+		}
+	} else {
+		parent = *clk;
 	}
 
 	if (renesas_clk_is_mod(&parent))
@@ -166,32 +169,47 @@
 	if (ret)
 		return ret;
 
-	if (core->type != CLK_TYPE_GEN3_SD)
-		return 0;
+	ret = renesas_clk_get_parent(&parent, info, &grandparent);
+	if (ret) {
+		printf("%s[%i] grandparent fail, ret=%i\n", __func__, __LINE__, ret);
+		return ret;
+	}
 
-	debug("%s[%i] SDIF offset=%x\n", __func__, __LINE__, core->offset);
+	switch (core->type) {
+	case CLK_TYPE_GEN3_SDH:
+		fallthrough;
+	case CLK_TYPE_GEN4_SDH:
+		div = DIV_ROUND_CLOSEST(gen3_clk_get_rate64(&grandparent), rate);
+		value = rcar_clk_get_table_val(cpg_sdh_div_table, div);
+		if (value < 0)
+			return value;
 
-	writel((rate == 400000000) ? 0x4 : 0x1, priv->base + core->offset);
+		clrsetbits_le32(priv->base + core->offset,
+				GENMASK(9, 2), value << 2);
 
-	return 0;
-}
+		debug("%s[%i] SDH clk: parent=%i offset=%x div=%u rate=%lu => val=%u\n",
+		      __func__, __LINE__, core->parent, core->offset, div, rate, value);
+		break;
 
-static int gen3_clk_enable(struct clk *clk)
-{
-	struct gen3_clk_priv *priv = dev_get_priv(clk->dev);
+	case CLK_TYPE_GEN3_SD:
+		fallthrough;
+	case CLK_TYPE_GEN4_SD:
+		div = DIV_ROUND_CLOSEST(gen3_clk_get_rate64(&grandparent), rate);
+		value = rcar_clk_get_table_val(cpg_sd_div_table, div);
+		if (value < 0)
+			return value;
 
-	return renesas_clk_endisable(clk, priv->base, priv->info, true);
-}
+		clrsetbits_le32(priv->base + core->offset,
+				GENMASK(1, 0), value);
 
-static int gen3_clk_disable(struct clk *clk)
-{
-	struct gen3_clk_priv *priv = dev_get_priv(clk->dev);
+		debug("%s[%i] SD clk: parent=%i offset=%x div=%u rate=%lu => val=%u\n",
+		      __func__, __LINE__, core->parent, core->offset, div, rate, value);
+		break;
+	}
 
-	return renesas_clk_endisable(clk, priv->base, priv->info, false);
+	return 0;
 }
 
-static u64 gen3_clk_get_rate64(struct clk *clk);
-
 static u64 gen3_clk_get_rate64_pll_mul_reg(struct gen3_clk_priv *priv,
 					   struct clk *parent,
 					   u32 mul_reg, u32 mult, u32 div,
@@ -223,7 +241,7 @@
 					priv->cpg_pll_config;
 	u32 value, div;
 	u64 rate = 0;
-	int i, ret;
+	int ret;
 
 	debug("%s[%i] Clock: id=%lu\n", __func__, __LINE__, clk->id);
 
@@ -328,28 +346,26 @@
 	case CLK_TYPE_GEN3_SDH:	/* Fixed factor 1:1 */
 		fallthrough;
 	case CLK_TYPE_GEN4_SDH:	/* Fixed factor 1:1 */
-		return gen3_clk_get_rate64(&parent);
+		/*
+		 * This takes STPnHCK and STPnCK bits into consideration
+		 * in the table look up too, hence the inobvious GENMASK
+		 * below. Bits [7:5] always read zero, so this is OKish.
+		 */
+		return rcar_clk_get_rate64_div_table(core->parent,
+						     gen3_clk_get_rate64(&parent),
+						     priv->base + core->offset,
+						     CPG_SDCKCR_SRCFC_MASK |
+						     GENMASK(9, 5),
+						     cpg_sdh_div_table, "SDH");
 
-	case CLK_TYPE_GEN3_SD:		/* FIXME */
+	case CLK_TYPE_GEN3_SD:
 		fallthrough;
 	case CLK_TYPE_GEN4_SD:
-		value = readl(priv->base + core->offset);
-		value &= CPG_SD_STP_MASK | CPG_SD_FC_MASK;
-
-		for (i = 0; i < ARRAY_SIZE(cpg_sd_div_table); i++) {
-			if (cpg_sd_div_table[i].val != value)
-				continue;
-
-			rate = gen3_clk_get_rate64(&parent) /
-			       cpg_sd_div_table[i].div;
-			debug("%s[%i] SD clk: parent=%i div=%i => rate=%llu\n",
-			      __func__, __LINE__,
-			      core->parent, cpg_sd_div_table[i].div, rate);
-
-			return rate;
-		}
-
-		return -EINVAL;
+		return rcar_clk_get_rate64_div_table(core->parent,
+						     gen3_clk_get_rate64(&parent),
+						     priv->base + core->offset,
+						     CPG_SDCKCR_FC_MASK,
+						     cpg_sd_div_table, "SD");
 
 	case CLK_TYPE_GEN3_RPCSRC:
 		return rcar_clk_get_rate64_div_table(core->parent,
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.h b/drivers/clk/renesas/rcar-gen3-cpg.h
index 41a30c5..008e892 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.h
+++ b/drivers/clk/renesas/rcar-gen3-cpg.h
@@ -111,6 +111,11 @@
 
 #define CPG_RST_MODEMR	0x060
 
+#define CPG_SDCKCR_STPnHCK		BIT(9)
+#define CPG_SDCKCR_STPnCK		BIT(8)
+#define CPG_SDCKCR_SRCFC_MASK		GENMASK(4, 2)
+#define CPG_SDCKCR_FC_MASK		GENMASK(1, 0)
+
 #define CPG_RPCCKCR	0x238
 #define CPG_RPCCKCR_DIV_POST_MASK	GENMASK(4, 3)
 #define CPG_RPCCKCR_DIV_PRE_MASK	GENMASK(2, 0)
diff --git a/drivers/mmc/renesas-sdhi.c b/drivers/mmc/renesas-sdhi.c
index f30d784..4a1acce 100644
--- a/drivers/mmc/renesas-sdhi.c
+++ b/drivers/mmc/renesas-sdhi.c
@@ -358,13 +358,21 @@
 	struct mmc *mmc = mmc_get_mmc_dev(dev);
 	bool hs400 = (mmc->selected_mode == MMC_HS_400);
 	int ret, taps = hs400 ? priv->nrtaps : 8;
+	const u32 sdn_rate = 200000000;
+	u32 sdnh_rate = 800000000;
 	unsigned long new_tap;
 	u32 reg;
 
-	if (taps == 4)	/* HS400 on 4tap SoC needs different clock */
-		ret = clk_set_rate(&priv->clk, 400000000);
-	else
-		ret = clk_set_rate(&priv->clk, 200000000);
+	if (clk_valid(&priv->clkh) && !priv->needs_clkh_fallback) {
+		/* HS400 on 4tap SoC => SDnH=400 MHz, SDn=200 MHz */
+		if (taps == 4)
+			sdnh_rate /= 2;
+		ret = clk_set_rate(&priv->clkh, sdnh_rate);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = clk_set_rate(&priv->clk, sdn_rate);
 	if (ret < 0)
 		return ret;
 
@@ -967,6 +975,11 @@
 		return ret;
 	}
 
+	/* optional SDnH clock */
+	ret = clk_get_by_name(dev, "clkh", &priv->clkh);
+	if (ret < 0)
+		dev_dbg(dev, "failed to get clkh\n");
+
 	/* set to max rate */
 	ret = clk_set_rate(&priv->clk, 200000000);
 	if (ret < 0) {
diff --git a/drivers/mmc/tmio-common.h b/drivers/mmc/tmio-common.h
index e517ed9..88244e8 100644
--- a/drivers/mmc/tmio-common.h
+++ b/drivers/mmc/tmio-common.h
@@ -138,6 +138,7 @@
 #endif
 #if CONFIG_IS_ENABLED(CLK)
 	struct clk			clk;
+	struct clk			clkh;
 #endif
 #if CONFIG_IS_ENABLED(RENESAS_SDHI)
 	unsigned int			smpcmp;