refactor(msm8916): handle multiple CPU clusters

Some Qualcomm platforms similar to MSM8916 have multiple CPU clusters.
In this case, some of the hardware blocks are duplicated and must be
configured separately.

Refactor the code to handle additional clusters by introducing loops
and some conditionals.

No functional change for existing single cluster platforms.

Change-Id: I5b4b1ad2a1adde559d5b79b7698afe73733b2e90
Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
diff --git a/plat/qti/msm8916/aarch32/msm8916_helpers.S b/plat/qti/msm8916/aarch32/msm8916_helpers.S
index ea39663..c3acba7 100644
--- a/plat/qti/msm8916/aarch32/msm8916_helpers.S
+++ b/plat/qti/msm8916/aarch32/msm8916_helpers.S
@@ -6,6 +6,7 @@
 
 #include <arch.h>
 #include <asm_macros.S>
+#include <platform_def.h>
 
 #include <msm8916_mmap.h>
 
@@ -79,9 +80,13 @@
 	 * -------------------------------------------------
 	 */
 func plat_my_core_pos
-	/* There is just a single cluster so this is very simple */
-	ldcopr	r0, MPIDR
-	and	r0, r0, #MPIDR_CPU_MASK
+	ldcopr	r1, MPIDR
+	and	r0, r1, #MPIDR_CPU_MASK
+	.if PLATFORM_CLUSTER_COUNT > 1
+		and	r1, r1, #MPIDR_CLUSTER_MASK
+		orr	r0, r0, r1, LSR #(MPIDR_AFFINITY_BITS - \
+					  PLATFORM_CPU_PER_CLUSTER_SHIFT)
+	.endif
 	bx	lr
 endfunc plat_my_core_pos
 
@@ -102,7 +107,7 @@
 	 * Cold boot: Disable TCM redirect to L2 cache as early as
 	 * possible to avoid crashes when making use of the cache.
 	 */
-	ldr	r1, =APCS_CFG
+	ldr	r1, =APCS_CFG(0)
 	ldr	r2, [r1, #APCS_TCM_START_ADDR]
 	and	r2, r2, #~APCS_TCM_REDIRECT_EN_0
 	str	r2, [r1, #APCS_TCM_START_ADDR]
diff --git a/plat/qti/msm8916/aarch64/msm8916_helpers.S b/plat/qti/msm8916/aarch64/msm8916_helpers.S
index bccc5e5..c2d0813 100644
--- a/plat/qti/msm8916/aarch64/msm8916_helpers.S
+++ b/plat/qti/msm8916/aarch64/msm8916_helpers.S
@@ -6,6 +6,7 @@
 
 #include <arch.h>
 #include <asm_macros.S>
+#include <platform_def.h>
 
 #include <msm8916_mmap.h>
 
@@ -78,9 +79,13 @@
 	 * -------------------------------------------------
 	 */
 func plat_my_core_pos
-	/* There is just a single cluster so this is very simple */
-	mrs	x0, mpidr_el1
-	and	x0, x0, #MPIDR_CPU_MASK
+	mrs	x1, mpidr_el1
+	and	x0, x1, #MPIDR_CPU_MASK
+	.if PLATFORM_CLUSTER_COUNT > 1
+		and	x1, x1, #MPIDR_CLUSTER_MASK
+		orr	x0, x0, x1, LSR #(MPIDR_AFFINITY_BITS - \
+					  PLATFORM_CPU_PER_CLUSTER_SHIFT)
+	.endif
 	ret
 endfunc plat_my_core_pos
 
@@ -100,7 +105,7 @@
 	 * Cold boot: Disable TCM redirect to L2 cache as early as
 	 * possible to avoid crashes when making use of the cache.
 	 */
-	mov_imm	x1, APCS_CFG
+	mov_imm	x1, APCS_CFG(0)
 	ldr	w2, [x1, #APCS_TCM_START_ADDR]
 	and	w2, w2, #~APCS_TCM_REDIRECT_EN_0
 	str	w2, [x1, #APCS_TCM_START_ADDR]
diff --git a/plat/qti/msm8916/include/msm8916_mmap.h b/plat/qti/msm8916/include/msm8916_mmap.h
index dc420fc..5cb2f44 100644
--- a/plat/qti/msm8916/include/msm8916_mmap.h
+++ b/plat/qti/msm8916/include/msm8916_mmap.h
@@ -33,11 +33,14 @@
 #define APCS_QGIC2_GICC		(APCS_QGIC2_BASE + 0x2000)
 #define APCS_BANKED_ACS		(APCS_BASE + 0x08000)
 #define APCS_BANKED_SAW2	(APCS_BASE + 0x09000)
-#define APCS_CFG		(APCS_BASE + 0x10000)
-#define APCS_GLB		(APCS_BASE + 0x11000)
-#define APCS_L2_SAW2		(APCS_BASE + 0x12000)
-#define APCS_QTMR		(APCS_BASE + 0x20000)
-#define APCS_ALIAS_ACS(cpu)	(APCS_BASE + 0x88000 + ((cpu) * 0x10000))
-#define APCS_ALIAS_SAW2(cpu)	(APCS_BASE + 0x89000 + ((cpu) * 0x10000))
+
+#define _APCS_CLUSTER(cluster)	(APCS_BASE + ((cluster) * 0x100000))
+#define _APCS_CPU(cluster, cpu)	(_APCS_CLUSTER(cluster) + ((cpu) * 0x10000))
+#define APCS_CFG(cluster)	(_APCS_CLUSTER(cluster) + 0x10000)
+#define APCS_GLB(cluster)	(_APCS_CLUSTER(cluster) + 0x11000)
+#define APCS_L2_SAW2(cluster)	(_APCS_CLUSTER(cluster) + 0x12000)
+#define APCS_QTMR(cluster)	(_APCS_CLUSTER(cluster) + 0x20000)
+#define APCS_ALIAS_ACS(cluster, cpu)	(_APCS_CPU(cluster, cpu) + 0x88000)
+#define APCS_ALIAS_SAW2(cluster, cpu)	(_APCS_CPU(cluster, cpu) + 0x89000)
 
 #endif /* MSM8916_MMAP_H */
diff --git a/plat/qti/msm8916/include/platform_def.h b/plat/qti/msm8916/include/platform_def.h
index f6ba1cc..7b31917 100644
--- a/plat/qti/msm8916/include/platform_def.h
+++ b/plat/qti/msm8916/include/platform_def.h
@@ -27,9 +27,10 @@
 
 /* CPU topology: single cluster with 4 cores */
 #define PLATFORM_CLUSTER_COUNT		U(1)
-#define PLATFORM_MAX_CPUS_PER_CLUSTER	U(4)
+#define PLATFORM_CPU_PER_CLUSTER_SHIFT	U(2)	/* 4 */
+#define PLATFORM_CPUS_PER_CLUSTER	(1 << PLATFORM_CPU_PER_CLUSTER_SHIFT)
 #define PLATFORM_CORE_COUNT		(PLATFORM_CLUSTER_COUNT * \
-					 PLATFORM_MAX_CPUS_PER_CLUSTER)
+					 PLATFORM_CPUS_PER_CLUSTER)
 
 /* Power management */
 #define PLATFORM_SYSTEM_COUNT		U(1)
diff --git a/plat/qti/msm8916/msm8916_config.c b/plat/qti/msm8916/msm8916_config.c
index 8ae07a0..47eede2 100644
--- a/plat/qti/msm8916/msm8916_config.c
+++ b/plat/qti/msm8916/msm8916_config.c
@@ -14,13 +14,13 @@
 #include <msm8916_mmap.h>
 #include <platform_def.h>
 
-static void msm8916_configure_timer(void)
+static void msm8916_configure_timer(uintptr_t base)
 {
 	/* Set timer frequency */
-	mmio_write_32(APCS_QTMR + CNTCTLBASE_CNTFRQ, PLAT_SYSCNT_FREQ);
+	mmio_write_32(base + CNTCTLBASE_CNTFRQ, PLAT_SYSCNT_FREQ);
 
 	/* Make all timer frames available to non-secure world */
-	mmio_write_32(APCS_QTMR + CNTNSAR, GENMASK_32(7, 0));
+	mmio_write_32(base + CNTNSAR, GENMASK_32(7, 0));
 }
 
 /*
@@ -30,16 +30,17 @@
  */
 #define APCS_GLB_SECURE_STS_NS		BIT_32(0)
 #define APCS_GLB_SECURE_PWR_NS		BIT_32(1)
-#define APCS_BOOT_START_ADDR_SEC	(APCS_CFG + 0x04)
+#define APCS_BOOT_START_ADDR_SEC	0x04
 #define REMAP_EN			BIT_32(0)
-#define APCS_AA64NAA32_REG		(APCS_CFG + 0x0c)
+#define APCS_AA64NAA32_REG		0x0c
 
-static void msm8916_configure_cpu_pm(void)
+static void msm8916_configure_apcs_cluster(unsigned int cluster)
 {
+	uintptr_t cfg = APCS_CFG(cluster);
 	unsigned int cpu;
 
 	/* Disallow non-secure access to boot remapper / TCM registers */
-	mmio_write_32(APCS_CFG, 0);
+	mmio_write_32(cfg, 0);
 
 	/*
 	 * Disallow non-secure access to power management registers.
@@ -47,27 +48,39 @@
 	 * to CPU frequency related registers (e.g. APCS_CMD_RCGR). If these
 	 * bits are not set, CPU frequency control fails in the non-secure world.
 	 */
-	mmio_write_32(APCS_GLB, APCS_GLB_SECURE_STS_NS | APCS_GLB_SECURE_PWR_NS);
+	mmio_write_32(APCS_GLB(cluster),
+		      APCS_GLB_SECURE_STS_NS | APCS_GLB_SECURE_PWR_NS);
 
 	/* Disallow non-secure access to L2 SAW2 */
-	mmio_write_32(APCS_L2_SAW2, 0);
+	mmio_write_32(APCS_L2_SAW2(cluster), 0);
 
 	/* Disallow non-secure access to CPU ACS and SAW2 */
-	for (cpu = 0; cpu < PLATFORM_CORE_COUNT; cpu++) {
-		mmio_write_32(APCS_ALIAS_ACS(cpu), 0);
-		mmio_write_32(APCS_ALIAS_SAW2(cpu), 0);
+	for (cpu = 0; cpu < PLATFORM_CPUS_PER_CLUSTER; cpu++) {
+		mmio_write_32(APCS_ALIAS_ACS(cluster, cpu), 0);
+		mmio_write_32(APCS_ALIAS_SAW2(cluster, cpu), 0);
 	}
 
 #ifdef __aarch64__
 	/* Make sure all further warm boots end up in BL31 and aarch64 state */
 	CASSERT((BL31_BASE & 0xffff) == 0, assert_bl31_base_64k_aligned);
-	mmio_write_32(APCS_BOOT_START_ADDR_SEC, BL31_BASE | REMAP_EN);
-	mmio_write_32(APCS_AA64NAA32_REG, 1);
+	mmio_write_32(cfg + APCS_BOOT_START_ADDR_SEC, BL31_BASE | REMAP_EN);
+	mmio_write_32(cfg + APCS_AA64NAA32_REG, 1);
 #else
 	/* Make sure all further warm boots end up in BL32 */
 	CASSERT((BL32_BASE & 0xffff) == 0, assert_bl32_base_64k_aligned);
-	mmio_write_32(APCS_BOOT_START_ADDR_SEC, BL32_BASE | REMAP_EN);
+	mmio_write_32(cfg + APCS_BOOT_START_ADDR_SEC, BL32_BASE | REMAP_EN);
 #endif
+
+	msm8916_configure_timer(APCS_QTMR(cluster));
+}
+
+static void msm8916_configure_apcs(void)
+{
+	unsigned int cluster;
+
+	for (cluster = 0; cluster < PLATFORM_CLUSTER_COUNT; cluster++) {
+		msm8916_configure_apcs_cluster(cluster);
+	}
 }
 
 /*
@@ -142,7 +155,6 @@
 void msm8916_configure(void)
 {
 	msm8916_gicv2_configure();
-	msm8916_configure_timer();
-	msm8916_configure_cpu_pm();
+	msm8916_configure_apcs();
 	msm8916_configure_smmu();
 }
diff --git a/plat/qti/msm8916/msm8916_cpu_boot.c b/plat/qti/msm8916/msm8916_cpu_boot.c
index b3f51f6..3668903 100644
--- a/plat/qti/msm8916/msm8916_cpu_boot.c
+++ b/plat/qti/msm8916/msm8916_cpu_boot.c
@@ -1,14 +1,14 @@
 /*
- * Copyright (c) 2021, Stephan Gerhold <stephan@gerhold.net>
+ * Copyright (c) 2021-2022, Stephan Gerhold <stephan@gerhold.net>
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch_helpers.h>
+#include <common/debug.h>
 #include <drivers/delay_timer.h>
 #include <lib/mmio.h>
 
-#include <msm8916_mmap.h>
 #include "msm8916_pm.h"
 
 #define CPU_PWR_CTL			0x4
@@ -27,11 +27,12 @@
 #define APC_PWR_GATE_CTL_GHDS_CNT(cnt)	((cnt) << 24)
 
 /* Boot a secondary CPU core for the first time. */
-void msm8916_cpu_boot(unsigned int core)
+void msm8916_cpu_boot(uintptr_t acs)
 {
-	uintptr_t acs = APCS_ALIAS_ACS(core);
 	uint32_t pwr_ctl;
 
+	VERBOSE("PSCI: Powering on CPU @ 0x%08lx\n", acs);
+
 	pwr_ctl = CPU_PWR_CTL_CLAMP | CPU_PWR_CTL_CORE_MEM_CLAMP |
 		  CPU_PWR_CTL_CORE_RST | CPU_PWR_CTL_COREPOR_RST;
 	mmio_write_32(acs + CPU_PWR_CTL, pwr_ctl);
diff --git a/plat/qti/msm8916/msm8916_pm.c b/plat/qti/msm8916/msm8916_pm.c
index 792a096..6267344 100644
--- a/plat/qti/msm8916/msm8916_pm.c
+++ b/plat/qti/msm8916/msm8916_pm.c
@@ -16,13 +16,23 @@
 #include <msm8916_mmap.h>
 #include "msm8916_pm.h"
 
+/*
+ * On platforms with two clusters the index of the APCS memory region is swapped
+ * compared to the MPIDR cluster affinity level: APCS cluster 0 manages CPUs
+ * with cluster affinity level 1, while APCS cluster 1 manages CPUs with level 0.
+ *
+ * On platforms with a single cluster there is only one APCS memory region.
+ */
+#if PLATFORM_CLUSTER_COUNT == 2
+#define MPIDR_APCS_CLUSTER(mpidr)	!MPIDR_AFFLVL1_VAL(mpidr)
+#else
+#define MPIDR_APCS_CLUSTER(mpidr)	0
+#endif
+
 static int msm8916_pwr_domain_on(u_register_t mpidr)
 {
-	unsigned int core = MPIDR_AFFLVL0_VAL(mpidr);
-
-	VERBOSE("PSCI: Booting CPU %d\n", core);
-	msm8916_cpu_boot(core);
-
+	msm8916_cpu_boot(APCS_ALIAS_ACS(MPIDR_APCS_CLUSTER(mpidr),
+					MPIDR_AFFLVL0_VAL(mpidr)));
 	return PSCI_E_SUCCESS;
 }
 
diff --git a/plat/qti/msm8916/msm8916_pm.h b/plat/qti/msm8916/msm8916_pm.h
index 5473bfa..38190a3 100644
--- a/plat/qti/msm8916/msm8916_pm.h
+++ b/plat/qti/msm8916/msm8916_pm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, Stephan Gerhold <stephan@gerhold.net>
+ * Copyright (c) 2021-2022, Stephan Gerhold <stephan@gerhold.net>
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,6 @@
 #ifndef MSM8916_PM_H
 #define MSM8916_PM_H
 
-void msm8916_cpu_boot(unsigned int core);
+void msm8916_cpu_boot(uintptr_t acs);
 
 #endif /* MSM8916_PM_H */
diff --git a/plat/qti/msm8916/msm8916_topology.c b/plat/qti/msm8916/msm8916_topology.c
index 4d0ed8f..d8cdc0e 100644
--- a/plat/qti/msm8916/msm8916_topology.c
+++ b/plat/qti/msm8916/msm8916_topology.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2022, Stephan Gerhold <stephan@gerhold.net>
  * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
@@ -9,24 +10,27 @@
 
 #include <platform_def.h>
 
-static const unsigned char plat_power_domain_tree_desc[PLAT_MAX_PWR_LVL + 1] = {
+static const unsigned char plat_power_domain_tree_desc[] = {
 	PLATFORM_SYSTEM_COUNT,
 	PLATFORM_CLUSTER_COUNT,
-	PLATFORM_MAX_CPUS_PER_CLUSTER,
+	PLATFORM_CPUS_PER_CLUSTER,
+#if PLATFORM_CLUSTER_COUNT > 1
+	PLATFORM_CPUS_PER_CLUSTER,
+#endif
 };
 
 int plat_core_pos_by_mpidr(u_register_t mpidr)
 {
+	unsigned int cluster = MPIDR_AFFLVL1_VAL(mpidr);
 	unsigned int core = MPIDR_AFFLVL0_VAL(mpidr);
 
 	if (MPIDR_AFFLVL3_VAL(mpidr) > 0 ||
 	    MPIDR_AFFLVL2_VAL(mpidr) > 0 ||
-	    MPIDR_AFFLVL1_VAL(mpidr) > 0 ||
-	    core >= PLATFORM_MAX_CPUS_PER_CLUSTER) {
+	    cluster >= PLATFORM_CLUSTER_COUNT ||
+	    core >= PLATFORM_CPUS_PER_CLUSTER) {
 		return -1;
 	}
-
-	return core;
+	return core | (cluster << PLATFORM_CPU_PER_CLUSTER_SHIFT);
 }
 
 const unsigned char *plat_get_power_domain_tree_desc(void)