ARM: tegra: implement RAM repair

RAM repair has a few pre-requisites:
1) PMIC power supply (rail) enabled.
2) PMC CRAIL power partition powered.
3) Fuse clock active (it's the default).
4) PLLP reshift branch enabled (it's the default, when PLLP is active).

RAM repair also only need run whenever specific partitions are powered
(main SoC and CCPLEX respectively); RAM repair does not need to be
triggered when any other partition changes state.

start_cpu() needs to be re-ordered slightly to match these requirements.
Note that C0NC and CE0 aren't required for RAM repair to
operate, but they also do no harm, so the entire of powerup_cpus() is
moved rather than splitting it up. The call to remove_cpu_resets() is
moved last to ensure that all other actions complete before releasing
reset; since the PMC power partitions are now enabled early, releasing
reset is what causes the CPUs to start executing code, and RAM repair must
complete before the CPU boots.

Note that this commit is the result of squashing a numbmer of commits
in NVIDIA's downstream L4T branch, hence the multiple signoffs below.

Signed-off-by: Bibek Basu <bbasu@nvidia.com>
Signed-off-by: Sandipan Patra <spatra@nvidia.com>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Tom Warren <twarren@nvidia.com>
diff --git a/arch/arm/include/asm/arch-tegra124/flow.h b/arch/arm/include/asm/arch-tegra124/flow.h
index 2b330dc..62947bf 100644
--- a/arch/arm/include/asm/arch-tegra124/flow.h
+++ b/arch/arm/include/asm/arch-tegra124/flow.h
@@ -25,6 +25,12 @@
 	u32 cpu_pwr_csr;	/* offset 0x38 */
 	u32 mpid;		/* offset 0x3c */
 	u32 ram_repair;		/* offset 0x40 */
+	u32 flow_dbg_sel;	/* offset 0x44 */
+	u32 flow_dbg_cnt0;	/* offset 0x48 */
+	u32 flow_dbg_cnt1;	/* offset 0x4c */
+	u32 flow_dbg_qual;	/* offset 0x50 */
+	u32 flow_ctrl_spare;	/* offset 0x54 */
+	u32 ram_repair_cluster1;/* offset 0x58 */
 };
 
 /* HALT_COP_EVENTS_0, 0x04 */
@@ -42,4 +48,8 @@
 #define CSR_WAIT_WFI_SHIFT	8
 #define CSR_PWR_OFF_STS		(1 << 16)
 
+#define RAM_REPAIR_REQ		BIT(0)
+#define RAM_REPAIR_STS		BIT(1)
+#define RAM_REPAIR_BYPASS_EN	BIT(2)
+
 #endif	/*  _TEGRA124_FLOW_H_ */
diff --git a/arch/arm/mach-tegra/tegra124/cpu.c b/arch/arm/mach-tegra/tegra124/cpu.c
index 204d6e9..992c0be 100644
--- a/arch/arm/mach-tegra/tegra124/cpu.c
+++ b/arch/arm/mach-tegra/tegra124/cpu.c
@@ -104,6 +104,43 @@
 	writel(reg, &clkrst->crc_rst_cpug_cmplx_clr);
 }
 
+static void tegra124_ram_repair(void)
+{
+	struct flow_ctlr *flow = (struct flow_ctlr *)NV_PA_FLOW_BASE;
+	u32 ram_repair_timeout; /*usec*/
+	u32 val;
+
+	/*
+	 * Request the Flow Controller perform RAM repair whenever it turns on
+	 * a power rail that requires RAM repair.
+	 */
+	clrbits_le32(&flow->ram_repair, RAM_REPAIR_BYPASS_EN);
+
+	/* Request SW trigerred RAM repair by setting req  bit */
+	/* cluster 0 */
+	setbits_le32(&flow->ram_repair, RAM_REPAIR_REQ);
+	/* Wait for completion (status == 0) */
+	ram_repair_timeout = 500;
+	do {
+		udelay(1);
+		val = readl(&flow->ram_repair);
+	} while (!(val & RAM_REPAIR_STS) && ram_repair_timeout--);
+	if (!ram_repair_timeout)
+		debug("Ram Repair cluster0 failed\n");
+
+	/* cluster 1 */
+	setbits_le32(&flow->ram_repair_cluster1, RAM_REPAIR_REQ);
+	/* Wait for completion (status == 0) */
+	ram_repair_timeout = 500;
+	do {
+		udelay(1);
+		val = readl(&flow->ram_repair_cluster1);
+	} while (!(val & RAM_REPAIR_STS) && ram_repair_timeout--);
+
+	if (!ram_repair_timeout)
+		debug("Ram Repair cluster1 failed\n");
+}
+
 /**
  * Tegra124 requires some special clock initialization, including setting up
  * the DVC I2C, turning on MSELECT and selecting the G CPU cluster
@@ -254,10 +291,11 @@
 	       &pmc->pmc_pwrgate_timer_mult);
 
 	enable_cpu_power_rail();
+	powerup_cpus();
+	tegra124_ram_repair();
 	enable_cpu_clocks();
 	clock_enable_coresight(1);
-	remove_cpu_resets();
 	writel(reset_vector, EXCEP_VECTOR_CPU_RESET_VECTOR);
-	powerup_cpus();
+	remove_cpu_resets();
 	debug("%s exit, should continue @ reset_vector\n", __func__);
 }