armv8/fsl-lsch3: Update code to release secondary cores

NXP ARMv8 SoC LS2080A release all secondary cores in one-go.
But other new SoCs like LS2088A, LS1088A release secondary
cores one by one.

Update code to release secondary cores based on SoC SVR
Add code to release cores one by one for non LS2080A SoCs

Signed-off-by: Priyanka Jain <priyanka.jain@nxp.com>
Signed-off-by: Raghav Dogra <raghav.dogra@nxp.com>
Signed-off-by: Prabhakar Kushwaha <prabhakar.kushwaha@nxp.com>
[YS: remove "inline" from declaration of initiator_type]
Reviewed-by: York Sun <york.sun@nxp.com>
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c
index eb03bf4..d6ee546 100644
--- a/arch/arm/cpu/armv8/fsl-layerscape/cpu.c
+++ b/arch/arm/cpu/armv8/fsl-layerscape/cpu.c
@@ -191,7 +191,7 @@
 }
 #endif
 
-static inline u32 initiator_type(u32 cluster, int init_id)
+u32 initiator_type(u32 cluster, int init_id)
 {
 	struct ccsr_gur *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
 	u32 idx = (cluster >> (init_id * 8)) & TP_CLUSTER_INIT_MASK;
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/cpu.h b/arch/arm/cpu/armv8/fsl-layerscape/cpu.h
index 8072f3c..a05f8aa 100644
--- a/arch/arm/cpu/armv8/fsl-layerscape/cpu.h
+++ b/arch/arm/cpu/armv8/fsl-layerscape/cpu.h
@@ -5,4 +5,5 @@
  */
 
 int fsl_qoriq_core_to_cluster(unsigned int core);
+u32 initiator_type(u32 cluster, int init_id);
 u32 cpu_mask(void);
diff --git a/arch/arm/cpu/armv8/fsl-layerscape/mp.c b/arch/arm/cpu/armv8/fsl-layerscape/mp.c
index f607c39..97c6269 100644
--- a/arch/arm/cpu/armv8/fsl-layerscape/mp.c
+++ b/arch/arm/cpu/armv8/fsl-layerscape/mp.c
@@ -9,6 +9,8 @@
 #include <asm/system.h>
 #include <asm/arch/mp.h>
 #include <asm/arch/soc.h>
+#include "cpu.h"
+#include <asm/arch-fsl-layerscape/soc.h>
 
 DECLARE_GLOBAL_DATA_PTR;
 
@@ -22,11 +24,39 @@
 	return (phys_addr_t)&secondary_boot_code;
 }
 
+#ifdef CONFIG_FSL_LSCH3
+void wake_secondary_core_n(int cluster, int core, int cluster_cores)
+{
+	struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
+	struct ccsr_reset __iomem *rst = (void *)(CONFIG_SYS_FSL_RST_ADDR);
+	u32 mpidr = 0;
+
+	mpidr = ((cluster << 8) | core);
+	/*
+	 * mpidr_el1 register value of core which needs to be released
+	 * is written to scratchrw[6] register
+	 */
+	gur_out32(&gur->scratchrw[6], mpidr);
+	asm volatile("dsb st" : : : "memory");
+	rst->brrl |= 1 << ((cluster * cluster_cores) + core);
+	asm volatile("dsb st" : : : "memory");
+	/*
+	 * scratchrw[6] register value is polled
+	 * when the value becomes zero, this means that this core is up
+	 * and running, next core can be released now
+	 */
+	while (gur_in32(&gur->scratchrw[6]) != 0)
+		;
+}
+#endif
+
 int fsl_layerscape_wake_seconday_cores(void)
 {
 	struct ccsr_gur __iomem *gur = (void *)(CONFIG_SYS_FSL_GUTS_ADDR);
 #ifdef CONFIG_FSL_LSCH3
 	struct ccsr_reset __iomem *rst = (void *)(CONFIG_SYS_FSL_RST_ADDR);
+	u32 svr, ver, cluster, type;
+	int j = 0, cluster_cores = 0;
 #elif defined(CONFIG_FSL_LSCH2)
 	struct ccsr_scfg __iomem *scfg = (void *)(CONFIG_SYS_FSL_SCFG_ADDR);
 #endif
@@ -55,10 +85,40 @@
 #ifdef CONFIG_FSL_LSCH3
 	gur_out32(&gur->bootlocptrh, (u32)(gd->relocaddr >> 32));
 	gur_out32(&gur->bootlocptrl, (u32)gd->relocaddr);
-	gur_out32(&gur->scratchrw[6], 1);
-	asm volatile("dsb st" : : : "memory");
-	rst->brrl = cores;
-	asm volatile("dsb st" : : : "memory");
+
+	svr = gur_in32(&gur->svr);
+	ver = SVR_SOC_VER(svr);
+	if (ver == SVR_LS2080A || ver == SVR_LS2085A) {
+		gur_out32(&gur->scratchrw[6], 1);
+		asm volatile("dsb st" : : : "memory");
+		rst->brrl = cores;
+		asm volatile("dsb st" : : : "memory");
+	} else {
+		/*
+		 * Release the cores out of reset one-at-a-time to avoid
+		 * power spikes
+		 */
+		i = 0;
+		cluster = in_le32(&gur->tp_cluster[i].lower);
+		for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
+			type = initiator_type(cluster, j);
+			if (type &&
+			    TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
+				cluster_cores++;
+		}
+
+		do {
+			cluster = in_le32(&gur->tp_cluster[i].lower);
+			for (j = 0; j < TP_INIT_PER_CLUSTER; j++) {
+				type = initiator_type(cluster, j);
+				if (type &&
+				    TP_ITYP_TYPE(type) == TP_ITYP_TYPE_ARM)
+					wake_secondary_core_n(i, j,
+							      cluster_cores);
+			}
+		i++;
+		} while ((cluster & TP_CLUSTER_EOC) != TP_CLUSTER_EOC);
+	}
 #elif defined(CONFIG_FSL_LSCH2)
 	scfg_out32(&scfg->scratchrw[0], (u32)(gd->relocaddr >> 32));
 	scfg_out32(&scfg->scratchrw[1], (u32)gd->relocaddr);