ti816x: Rework DDR initialization sequence

The ti816x/am389x SoC is the first generation in what U-Boot calls the
"am33xx" family.  In the first generation of this family the DDR
initialization sequence is quite different from all of the subsequent
generations.  Whereas with ti814x (second generation) we can easily work
the minor differenced between that and am33xx (third generation), our
attempts to do this for ti816x weren't sufficient.  Rather than add a
large amount of #ifdef logic to make this different sequence work we add
a new file, ti816x_emif4.c to handle the various required undocumented
register writes and sequence and leverage what we can from
arch/arm/mach-omap2/am33xx/ddr.c still.  As DDR2 has similar problems
today but I am unable to test it, we drop the DDR2 defines from the code
rather than imply that it works by leaving it.  We also remove a bunch
of other untested code about changing the speed the DDR runs at.

Signed-off-by: Tom Rini <trini@konsulko.com>
diff --git a/arch/arm/mach-omap2/am33xx/Makefile b/arch/arm/mach-omap2/am33xx/Makefile
index 05cc8a1..1e4c04e 100644
--- a/arch/arm/mach-omap2/am33xx/Makefile
+++ b/arch/arm/mach-omap2/am33xx/Makefile
@@ -15,7 +15,10 @@
 obj-$(CONFIG_TI816X)	+= clock_ti816x.o
 obj-y	+= sys_info.o
 obj-y	+= ddr.o
+ifeq ($(CONFIG_TI816X)$(CONFIG_SKIP_LOWLEVEL_INIT),)
 obj-y	+= emif4.o
+endif
+obj-$(CONFIG_TI816X)	+= ti816x_emif4.o
 obj-y	+= board.o
 obj-y	+= mux.o
 
diff --git a/arch/arm/mach-omap2/am33xx/board.c b/arch/arm/mach-omap2/am33xx/board.c
index a8b5d13..faf7d07 100644
--- a/arch/arm/mach-omap2/am33xx/board.c
+++ b/arch/arm/mach-omap2/am33xx/board.c
@@ -39,6 +39,27 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
+int dram_init(void)
+{
+#ifndef CONFIG_SKIP_LOWLEVEL_INIT
+	sdram_init();
+#endif
+
+	/* dram_init must store complete ramsize in gd->ram_size */
+	gd->ram_size = get_ram_size(
+			(void *)CONFIG_SYS_SDRAM_BASE,
+			CONFIG_MAX_RAM_BANK_SIZE);
+	return 0;
+}
+
+int dram_init_banksize(void)
+{
+	gd->bd->bi_dram[0].start = CONFIG_SYS_SDRAM_BASE;
+	gd->bd->bi_dram[0].size = gd->ram_size;
+
+	return 0;
+}
+
 #if !CONFIG_IS_ENABLED(OF_CONTROL)
 static const struct ns16550_platdata am33xx_serial[] = {
 	{ .base = CONFIG_SYS_NS16550_COM1, .reg_shift = 2,
diff --git a/arch/arm/mach-omap2/am33xx/clock_ti816x.c b/arch/arm/mach-omap2/am33xx/clock_ti816x.c
index 079ddd7..967623d 100644
--- a/arch/arm/mach-omap2/am33xx/clock_ti816x.c
+++ b/arch/arm/mach-omap2/am33xx/clock_ti816x.c
@@ -54,57 +54,8 @@
 #define MAIN_MDIV7		0x4
 
 /* DDR PLL */
-#if defined(CONFIG_TI816X_DDR_PLL_400) /* 400 MHz */
 #define DDR_N			59
 #define DDR_P			0x1
-#define DDR_MDIV1		0x4
-#define DDR_INTFREQ2		0x8
-#define DDR_FRACFREQ2		0xD99999
-#define DDR_MDIV2		0x1E
-#define DDR_INTFREQ3		0x8
-#define DDR_FRACFREQ3		0x0
-#define DDR_MDIV3		0x4
-#define DDR_INTFREQ4		0xE /* Expansion DDR clk */
-#define DDR_FRACFREQ4		0x0
-#define DDR_MDIV4		0x4
-#define DDR_INTFREQ5		0xE /* Expansion DDR clk */
-#define DDR_FRACFREQ5		0x0
-#define DDR_MDIV5		0x4
-#elif defined(CONFIG_TI816X_DDR_PLL_531) /* 531 MHz */
-#define DDR_N			59
-#define DDR_P			0x1
-#define DDR_MDIV1		0x3
-#define DDR_INTFREQ2		0x8
-#define DDR_FRACFREQ2		0xD99999
-#define DDR_MDIV2		0x1E
-#define DDR_INTFREQ3		0x8
-#define DDR_FRACFREQ3		0x0
-#define DDR_MDIV3		0x4
-#define DDR_INTFREQ4		0xE /* Expansion DDR clk */
-#define DDR_FRACFREQ4		0x0
-#define DDR_MDIV4		0x4
-#define DDR_INTFREQ5		0xE /* Expansion DDR clk */
-#define DDR_FRACFREQ5		0x0
-#define DDR_MDIV5		0x4
-#elif defined(CONFIG_TI816X_DDR_PLL_675) /* 675 MHz */
-#define DDR_N			50
-#define DDR_P			0x1
-#define DDR_MDIV1		0x2
-#define DDR_INTFREQ2		0x9
-#define DDR_FRACFREQ2		0x0
-#define DDR_MDIV2		0x19
-#define DDR_INTFREQ3		0x13
-#define DDR_FRACFREQ3		0x800000
-#define DDR_MDIV3		0x2
-#define DDR_INTFREQ4		0xE /* Expansion DDR clk */
-#define DDR_FRACFREQ4		0x0
-#define DDR_MDIV4		0x4
-#define DDR_INTFREQ5		0xE /* Expansion DDR clk */
-#define DDR_FRACFREQ5		0x0
-#define DDR_MDIV5		0x4
-#elif defined(CONFIG_TI816X_DDR_PLL_796) /* 796 MHz */
-#define DDR_N			59
-#define DDR_P			0x1
 #define DDR_MDIV1		0x2
 #define DDR_INTFREQ2		0x8
 #define DDR_FRACFREQ2		0xD99999
@@ -118,12 +69,10 @@
 #define DDR_INTFREQ5		0xE /* Expansion DDR clk */
 #define DDR_FRACFREQ5		0x0
 #define DDR_MDIV5		0x4
-#endif
 
 #define CONTROL_STATUS			(CTRL_BASE + 0x40)
 #define DDR_RCD				(CTRL_BASE + 0x070C)
 #define CM_TIMER1_CLKSEL		(PRCM_BASE + 0x390)
-#define DMM_PAT_BASE_ADDR		(DMM_BASE + 0x420)
 #define CM_ALWON_CUST_EFUSE_CLKCTRL	(PRCM_BASE + 0x1628)
 
 #define INTCPS_SYSCONFIG	0x48200010
@@ -187,6 +136,15 @@
 
 void enable_dmm_clocks(void)
 {
+	writel(PRCM_MOD_EN, &cmdef->dmmclkctrl);
+	/* Wait for dmm to be fully functional, including OCP */
+	while (((readl(&cmdef->dmmclkctrl) >> 17) & 0x3) != 0)
+		;
+}
+
+void enable_emif_clocks(void)
+{
+	writel(PRCM_MOD_EN, &cmdef->fwclkctrl);
 	writel(PRCM_MOD_EN, &cmdef->l3fastclkstctrl);
 	writel(PRCM_MOD_EN, &cmdef->emif0clkctrl);
 	writel(PRCM_MOD_EN, &cmdef->emif1clkctrl);
@@ -200,14 +158,6 @@
 	/* Wait for emif1 to be fully functional, including OCP */
 	while (((readl(&cmdef->emif1clkctrl) >> 17) & 0x3) != 0)
 		;
-
-	writel(PRCM_MOD_EN, &cmdef->dmmclkctrl);
-	/* Wait for dmm to be fully functional, including OCP */
-	while (((readl(&cmdef->dmmclkctrl) >> 17) & 0x3) != 0)
-		;
-
-	/* Enable Tiled Access */
-	writel(0x80000000, DMM_PAT_BASE_ADDR);
 }
 
 /* assume delay is aprox at least 1us */
diff --git a/arch/arm/mach-omap2/am33xx/ddr.c b/arch/arm/mach-omap2/am33xx/ddr.c
index 690487e..7bf19ed 100644
--- a/arch/arm/mach-omap2/am33xx/ddr.c
+++ b/arch/arm/mach-omap2/am33xx/ddr.c
@@ -163,6 +163,14 @@
  */
 void config_sdram(const struct emif_regs *regs, int nr)
 {
+#ifdef CONFIG_TI816X
+	writel(regs->sdram_config, &emif_reg[nr]->emif_sdram_config);
+	writel(regs->emif_ddr_phy_ctlr_1, &emif_reg[nr]->emif_ddr_phy_ctrl_1);
+	writel(regs->emif_ddr_phy_ctlr_1, &emif_reg[nr]->emif_ddr_phy_ctrl_1_shdw);
+	writel(0x0000613B, &emif_reg[nr]->emif_sdram_ref_ctrl);   /* initially a large refresh period */
+	writel(0x1000613B, &emif_reg[nr]->emif_sdram_ref_ctrl);   /* trigger initialization           */
+	writel(regs->ref_ctrl, &emif_reg[nr]->emif_sdram_ref_ctrl);
+#else
 	if (regs->zq_config) {
 		writel(regs->zq_config, &emif_reg[nr]->emif_zq_config);
 		writel(regs->sdram_config, &cstat->secure_emif_sdram_config);
@@ -184,6 +192,7 @@
 	/* Write REG_COS_COUNT_1, REG_COS_COUNT_2, and REG_PR_OLD_COUNT. */
 	if (regs->ocp_config)
 		writel(regs->ocp_config, &emif_reg[nr]->emif_l3_config);
+#endif
 }
 
 /**
diff --git a/arch/arm/mach-omap2/am33xx/emif4.c b/arch/arm/mach-omap2/am33xx/emif4.c
index 3a110f2..68c7705 100644
--- a/arch/arm/mach-omap2/am33xx/emif4.c
+++ b/arch/arm/mach-omap2/am33xx/emif4.c
@@ -17,40 +17,9 @@
 #include <asm/io.h>
 #include <asm/emif.h>
 
-DECLARE_GLOBAL_DATA_PTR;
-
-int dram_init(void)
-{
-#ifndef CONFIG_SKIP_LOWLEVEL_INIT
-	sdram_init();
-#endif
-
-	/* dram_init must store complete ramsize in gd->ram_size */
-	gd->ram_size = get_ram_size(
-			(void *)CONFIG_SYS_SDRAM_BASE,
-			CONFIG_MAX_RAM_BANK_SIZE);
-	return 0;
-}
-
-int dram_init_banksize(void)
-{
-	gd->bd->bi_dram[0].start = CONFIG_SYS_SDRAM_BASE;
-	gd->bd->bi_dram[0].size = gd->ram_size;
-
-	return 0;
-}
-
-
-#ifndef CONFIG_SKIP_LOWLEVEL_INIT
-#ifdef CONFIG_TI81XX
-static struct dmm_lisa_map_regs *hw_lisa_map_regs =
-				(struct dmm_lisa_map_regs *)DMM_BASE;
-#endif
-#ifndef CONFIG_TI816X
 static struct vtp_reg *vtpreg[2] = {
 				(struct vtp_reg *)VTP0_CTRL_ADDR,
 				(struct vtp_reg *)VTP1_CTRL_ADDR};
-#endif
 #ifdef CONFIG_AM33XX
 static struct ddr_ctrl *ddrctrl = (struct ddr_ctrl *)DDR_CTRL_ADDR;
 #endif
@@ -60,9 +29,12 @@
 				(struct cm_device_inst *)CM_DEVICE_INST;
 #endif
 
-#ifdef CONFIG_TI81XX
+#ifdef CONFIG_TI814X
 void config_dmm(const struct dmm_lisa_map_regs *regs)
 {
+	struct dmm_lisa_map_regs *hw_lisa_map_regs =
+				(struct dmm_lisa_map_regs *)DMM_BASE;
+
 	enable_dmm_clocks();
 
 	writel(0, &hw_lisa_map_regs->dmm_lisa_map_3);
@@ -77,7 +49,6 @@
 }
 #endif
 
-#ifndef CONFIG_TI816X
 static void config_vtp(int nr)
 {
 	writel(readl(&vtpreg[nr]->vtp0ctrlreg) | VTP_CTRL_ENABLE,
@@ -92,7 +63,6 @@
 			VTP_CTRL_READY)
 		;
 }
-#endif
 
 void __weak ddr_pll_config(unsigned int ddrpll_m)
 {
@@ -103,9 +73,7 @@
 		const struct emif_regs *regs, int nr)
 {
 	ddr_pll_config(pll);
-#ifndef CONFIG_TI816X
 	config_vtp(nr);
-#endif
 	config_cmd_ctrl(ctrl, nr);
 
 	config_ddr_data(data, nr);
@@ -139,4 +107,3 @@
 	else
 		config_sdram(regs, nr);
 }
-#endif
diff --git a/arch/arm/mach-omap2/am33xx/ti816x_emif4.c b/arch/arm/mach-omap2/am33xx/ti816x_emif4.c
new file mode 100644
index 0000000..2e7ea90
--- /dev/null
+++ b/arch/arm/mach-omap2/am33xx/ti816x_emif4.c
@@ -0,0 +1,165 @@
+/*
+ * ti816x_emif4.c
+ *
+ * TI816x emif4 configuration file
+ *
+ * Copyright (C) 2017, Konsulko Group
+ *
+ * SPDX-License-Identifier:	GPL-2.0+
+ */
+
+#include <common.h>
+#include <asm/arch/cpu.h>
+#include <asm/arch/ddr_defs.h>
+#include <asm/arch/hardware.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/sys_proto.h>
+#include <asm/io.h>
+#include <asm/emif.h>
+
+/*********************************************************************
+ * Init DDR3 on TI816X EVM
+ *********************************************************************/
+static void ddr_init_settings(const struct cmd_control *ctrl, int emif)
+{
+	/*
+	 * setup use_rank_delays to 1.  This is only necessary when
+	 * multiple ranks are in use.  Though the EVM does not have
+	 * multiple ranks, this is a good value to set.
+	 */
+	writel(1, DDRPHY_CONFIG_BASE + 0x134); // DATA0_REG_PHY_USE_RANK0_DELAYS
+	writel(1, DDRPHY_CONFIG_BASE + 0x1d8); // DATA1_REG_PHY_USE_RANK0_DELAYS
+	writel(1, DDRPHY_CONFIG_BASE + 0x27c); // DATA2_REG_PHY_USE_RANK0_DELAYS
+	writel(1, DDRPHY_CONFIG_BASE + 0x320); // DATA3_REG_PHY_USE_RANK0_DELAYS
+
+	config_cmd_ctrl(ctrl, emif);
+
+	/* for ddr3 this needs to be set to 1 */
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x0F8); /* init mode */
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x104);
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x19C);
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x1A8);
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x240);
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x24C);
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x2E4);
+	writel(0x1, DDRPHY_CONFIG_BASE + 0x2F0);
+
+	/*
+	 * This represents the initial value for the leveling process.  The
+	 * value is a ratio - so 0x100 represents one cycle.  The real delay
+	 * is determined through the leveling process.
+	 *
+	 * During the leveling process, 0x20 is subtracted from the value, so
+	 * we have added that to the value we want to set.  We also set the
+	 * values such that byte3 completes leveling after byte2 and byte1
+	 * after byte0.
+	 */
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x0F0); /*  data0 writelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x0F4);   /*   */
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x194); /*  data1 writelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x198);   /*   */
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x238); /*  data2 writelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x23c);   /*   */
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x2dc); /*  data3 writelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x2e0);   /*   */
+
+
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x0FC); /*  data0 gatelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x100);
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x1A0); /*  data1 gatelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x1A4);
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x244); /*  data2 gatelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x248);
+	writel((0x20 << 10) | 0x20, DDRPHY_CONFIG_BASE + 0x2E8); /*  data3 gatelvl init ratio */
+	writel(0x0, DDRPHY_CONFIG_BASE + 0x2EC);
+
+	writel(0x5, DDRPHY_CONFIG_BASE + 0x00C);     /* cmd0 io config - output impedance of pad */
+	writel(0x5, DDRPHY_CONFIG_BASE + 0x010);     /* cmd0 io clk config - output impedance of pad */
+	writel(0x5, DDRPHY_CONFIG_BASE + 0x040);     /* cmd1 io config - output impedance of pad */
+	writel(0x5, DDRPHY_CONFIG_BASE + 0x044);     /* cmd1 io clk config - output impedance of pad */
+	writel(0x5, DDRPHY_CONFIG_BASE + 0x074);     /* cmd2 io config - output impedance of pad */
+	writel(0x5, DDRPHY_CONFIG_BASE + 0x078);     /* cmd2 io clk config - output impedance of pad */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x0A8);     /* data0 io config - output impedance of pad */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x0AC);     /* data0 io clk config - output impedance of pad */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x14C);     /* data1 io config - output impedance of pa     */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x150);     /* data1 io clk config - output impedance of pad */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x1F0);     /* data2 io config - output impedance of pa */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x1F4);     /* data2 io clk config - output impedance of pad */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x294);     /* data3 io config - output impedance of pa */
+	writel(0x4, DDRPHY_CONFIG_BASE + 0x298);     /* data3 io clk config - output impedance of pad */
+}
+
+static void ddr3_sw_levelling(const struct ddr_data *data, int emif)
+{
+	/* Set the correct value to DDR_VTP_CTRL_0 */
+	writel(0x6, (DDRPHY_CONFIG_BASE + 0x358));
+
+	writel(data->datafwsratio0, (DDRPHY_CONFIG_BASE + 0x108));
+	writel(data->datafwsratio0, (DDRPHY_CONFIG_BASE + 0x1AC));
+	writel(data->datafwsratio0, (DDRPHY_CONFIG_BASE + 0x250));
+	writel(data->datafwsratio0, (DDRPHY_CONFIG_BASE + 0x2F4));
+
+	writel(data->datawdsratio0, (DDRPHY_CONFIG_BASE + 0x0DC));
+	writel(data->datawdsratio0, (DDRPHY_CONFIG_BASE + 0x180));
+	writel(data->datawdsratio0, (DDRPHY_CONFIG_BASE + 0x224));
+	writel(data->datawdsratio0, (DDRPHY_CONFIG_BASE + 0x2C8));
+
+	writel(data->datawrsratio0, (DDRPHY_CONFIG_BASE + 0x120));
+	writel(data->datawrsratio0, (DDRPHY_CONFIG_BASE + 0x1C4));
+	writel(data->datawrsratio0, (DDRPHY_CONFIG_BASE + 0x268));
+	writel(data->datawrsratio0, (DDRPHY_CONFIG_BASE + 0x30C));
+
+	writel(data->datardsratio0, (DDRPHY_CONFIG_BASE + 0x0C8));
+	writel(data->datardsratio0, (DDRPHY_CONFIG_BASE + 0x16C));
+	writel(data->datardsratio0, (DDRPHY_CONFIG_BASE + 0x210));
+	writel(data->datardsratio0, (DDRPHY_CONFIG_BASE + 0x2B4));
+}
+
+static struct dmm_lisa_map_regs *hw_lisa_map_regs =
+				(struct dmm_lisa_map_regs *)DMM_BASE;
+
+#define DMM_PAT_BASE_ADDR		(DMM_BASE + 0x420)
+void config_dmm(const struct dmm_lisa_map_regs *regs)
+{
+	writel(0, &hw_lisa_map_regs->dmm_lisa_map_3);
+	writel(0, &hw_lisa_map_regs->dmm_lisa_map_2);
+	writel(0, &hw_lisa_map_regs->dmm_lisa_map_1);
+	writel(0, &hw_lisa_map_regs->dmm_lisa_map_0);
+
+	writel(regs->dmm_lisa_map_3, &hw_lisa_map_regs->dmm_lisa_map_3);
+	writel(regs->dmm_lisa_map_2, &hw_lisa_map_regs->dmm_lisa_map_2);
+	writel(regs->dmm_lisa_map_1, &hw_lisa_map_regs->dmm_lisa_map_1);
+	writel(regs->dmm_lisa_map_0, &hw_lisa_map_regs->dmm_lisa_map_0);
+
+	/* Enable Tiled Access */
+	writel(0x80000000, DMM_PAT_BASE_ADDR);
+}
+
+void config_ddr(const struct ddr_data *data, const struct cmd_control *ctrl,
+		const struct emif_regs *regs,
+		const struct dmm_lisa_map_regs *lisa_regs, int nrs)
+{
+	int i;
+
+	enable_emif_clocks();
+
+	for (i = 0; i < nrs; i++)
+		ddr_init_settings(ctrl, i);
+
+	enable_dmm_clocks();
+
+	/* Program the DMM to for non-interleaved configuration */
+	config_dmm(lisa_regs);
+
+	/* Program EMIF CFG Registers */
+	for (i = 0; i < nrs; i++) {
+		set_sdram_timings(regs, i);
+		config_sdram(regs, i);
+	}
+
+	udelay(1000);
+	for (i = 0; i < nrs; i++)
+		ddr3_sw_levelling(data, i);
+
+	udelay(50000);	/* Some delay needed */
+}