arm_fpga: Add support for topology self-discovery

As secondary cores show up, they populate an array to
announce themselves so plat_core_pos_by_mpidr() can
return an invalid COREID code for any non-existing
MPIDR that it is queried about.

The Power Domain Tree Description is populated with
a topology based on the maximum harcoded values.

Signed-off-by: Javier Almansa Sobrino <javier.almansasobrino@arm.com>
Change-Id: I8fd64761a2296714ce0f37c46544f3e6f13b5f61
diff --git a/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S b/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S
index aeed310..20120c9 100644
--- a/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S
+++ b/plat/arm/board/arm_fpga/aarch64/fpga_helpers.S
@@ -7,6 +7,8 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <common/bl_common.h>
+#include "../fpga_private.h"
+
 #include <platform_def.h>
 
 	.globl	plat_get_my_entrypoint
@@ -14,10 +16,10 @@
 	.globl	plat_is_my_cpu_primary
 	.globl	platform_mem_init
 	.globl	plat_my_core_pos
-	.globl	plat_fpga_calc_core_pos
 	.globl	plat_crash_console_init
 	.globl	plat_crash_console_putc
 	.globl	plat_crash_console_flush
+	.globl	plat_fpga_calc_core_pos
 
 /* -----------------------------------------------------------------------
  * Indicate a cold boot for every CPU - warm boot is unsupported for the
@@ -34,23 +36,59 @@
  * -----------------------------------------------------------------------
  */
 func plat_secondary_cold_boot_setup
+
+	/*
+	 * Wait for the primary processor to initialise the .BSS segment
+	 * to avoid a race condition that would erase fpga_valid_mpids
+	 * if it is populated before the C runtime is ready.
+	 *
+	 * We cannot use the current spin-lock implementation until the
+	 * runtime is up and we should not rely on sevl/wfe instructions as
+	 * it is optional whether they are implemented or not, so we use
+	 * a global variable as lock and wait for the primary processor to
+	 * finish the C runtime bring-up.
+	 */
+
+	ldr	w0, =C_RUNTIME_READY_KEY
+	adrp	x1, secondary_core_spinlock
+	add	x1, x1, :lo12:secondary_core_spinlock
+1:
+	wfe
+	ldr	w2, [x1]
+	cmp	w2, w0
+	b.ne	1b
+	/* Prevent reordering of the store into fpga_valid_mpids below */
+	dmb	ish
+
+	mov	x10, x30
+	bl	plat_my_core_pos
+	mov	x30, x10
+
+	adrp	x4, fpga_valid_mpids
+	add	x4, x4, :lo12:fpga_valid_mpids
+	mov	x5, #VALID_MPID
+	strb	w5, [x4, x0]
+
 	/*
 	 * Poll the CPU's hold entry until it indicates to jump
 	 * to the entrypoint address.
 	 */
-	bl	plat_my_core_pos
-	lsl	x0, x0, #PLAT_FPGA_HOLD_ENTRY_SHIFT
-	ldr	x1, =hold_base
-	ldr	x2, =fpga_sec_entrypoint
+
+	adrp	x1, hold_base
+	add	x1, x1, :lo12:hold_base
 poll_hold_entry:
-	ldr	x3, [x1, x0]
+	ldr	x3, [x1, x0, LSL #PLAT_FPGA_HOLD_ENTRY_SHIFT]
 	cmp	x3, #PLAT_FPGA_HOLD_STATE_GO
 	b.ne	1f
+
+	adrp	x2, fpga_sec_entrypoint
+	add	x2, x2, :lo12:fpga_sec_entrypoint
 	ldr	x3, [x2]
 	br	x3
 1:
 	wfe
 	b	poll_hold_entry
+
 endfunc plat_secondary_cold_boot_setup
 
 /* -----------------------------------------------------------------------
@@ -73,12 +111,16 @@
 endfunc platform_mem_init
 
 func plat_my_core_pos
+	ldr	x1, =(MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT))
 	mrs	x0, mpidr_el1
+	and	x0, x0, x1
 	b	plat_fpga_calc_core_pos
+
 endfunc plat_my_core_pos
 
 /* -----------------------------------------------------------------------
- * unsigned int plat_fpga_calc_core_pos(u_register_t mpidr)
+ * unsigned int plat_fpga_calc_core_pos (uint32_t mpid)
+ * Clobber registers: x0 to x5
  * -----------------------------------------------------------------------
  */
 func plat_fpga_calc_core_pos
@@ -88,6 +130,7 @@
 	 *
 	 * If not set, shift MPIDR to left to make it look as if in a
 	 * multi-threaded implementation.
+	 *
 	 */
 	tst	x0, #MPIDR_MT_MASK
 	lsl	x3, x0, #MPIDR_AFFINITY_BITS
@@ -98,11 +141,13 @@
 	ubfx	x1, x3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
 	ubfx	x2, x3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
 
-	/* Compute linear position */
 	mov	x4, #FPGA_MAX_CPUS_PER_CLUSTER
-	madd	x1, x2, x4, x1
 	mov	x5, #FPGA_MAX_PE_PER_CPU
+
+	/* Compute linear position */
+	madd	x1, x2, x4, x1
 	madd	x0, x1, x5, x0
+
 	ret
 endfunc plat_fpga_calc_core_pos
 
diff --git a/plat/arm/board/arm_fpga/fpga_bl31_setup.c b/plat/arm/board/arm_fpga/fpga_bl31_setup.c
index e4b9767..6eeff45 100644
--- a/plat/arm/board/arm_fpga/fpga_bl31_setup.c
+++ b/plat/arm/board/arm_fpga/fpga_bl31_setup.c
@@ -7,23 +7,23 @@
 #include <assert.h>
 
 #include <common/fdt_wrappers.h>
+#include <drivers/delay_timer.h>
 #include <drivers/generic_delay_timer.h>
-#include <lib/mmio.h>
 #include <libfdt.h>
 
+#include "fpga_private.h"
 #include <plat/common/platform.h>
 #include <platform_def.h>
 
-#include "fpga_private.h"
-
 static entry_point_info_t bl33_image_ep_info;
+volatile uint32_t secondary_core_spinlock;
 
 uintptr_t plat_get_ns_image_entrypoint(void)
 {
 #ifdef PRELOADED_BL33_BASE
 	return PRELOADED_BL33_BASE;
 #else
-	return 0;
+	return 0ULL;
 #endif
 }
 
@@ -35,6 +35,17 @@
 void bl31_early_platform_setup2(u_register_t arg0, u_register_t arg1,
 				u_register_t arg2, u_register_t arg3)
 {
+	/* Add this core to the VALID mpids list */
+	fpga_valid_mpids[plat_my_core_pos()] = VALID_MPID;
+
+	/*
+	 * Notify the secondary CPUs that the C runtime is ready
+	 * so they can announce themselves.
+	 */
+	secondary_core_spinlock = C_RUNTIME_READY_KEY;
+	dsbish();
+	sev();
+
 	fpga_console_init();
 
 	bl33_image_ep_info.pc = plat_get_ns_image_entrypoint();
@@ -54,11 +65,37 @@
 
 void bl31_platform_setup(void)
 {
-	/* Initialize the GIC driver, cpu and distributor interfaces */
-	plat_fpga_gic_init();
-
 	/* Write frequency to CNTCRL and initialize timer */
 	generic_delay_timer_init();
+
+	/*
+	 * Before doing anything else, wait for some time to ensure that
+	 * the secondary CPUs have populated the fpga_valid_mpids array.
+	 * As the number of secondary cores is unknown and can even be 0,
+	 * it is not possible to rely on any signal from them, so use a
+	 * delay instead.
+	 */
+	mdelay(5);
+
+	/*
+	 * On the event of a cold reset issued by, for instance, a reset pin
+	 * assertion, we cannot guarantee memory to be initialized to zero.
+	 * In such scenario, if the secondary cores reached
+	 * plat_secondary_cold_boot_setup before the primary one initialized
+	 * .BSS, we could end up having a race condition if the spinlock
+	 * was not cleared before.
+	 *
+	 * Similarly, if there were a reset before the spinlock had been
+	 * cleared, the secondary cores would find the lock opened before
+	 * .BSS is cleared, causing another race condition.
+	 *
+	 * So clean the spinlock as soon as we think it is safe to reduce the
+	 * chances of any race condition on a reset.
+	 */
+	secondary_core_spinlock = 0UL;
+
+	/* Initialize the GIC driver, cpu and distributor interfaces */
+	plat_fpga_gic_init();
 }
 
 entry_point_info_t *bl31_plat_get_next_image_ep_info(uint32_t type)
diff --git a/plat/arm/board/arm_fpga/fpga_def.h b/plat/arm/board/arm_fpga/fpga_def.h
index 5f1951f..2884ea6 100644
--- a/plat/arm/board/arm_fpga/fpga_def.h
+++ b/plat/arm/board/arm_fpga/fpga_def.h
@@ -18,6 +18,7 @@
  * that are present will still be indexed appropriately regardless of any empty
  * entries in the array used to represent the topology.
  */
+
 #define FPGA_MAX_CLUSTER_COUNT			4
 #define FPGA_MAX_CPUS_PER_CLUSTER		8
 #define FPGA_MAX_PE_PER_CPU			4
diff --git a/plat/arm/board/arm_fpga/fpga_private.h b/plat/arm/board/arm_fpga/fpga_private.h
index 7545bd1..46287ad 100644
--- a/plat/arm/board/arm_fpga/fpga_private.h
+++ b/plat/arm/board/arm_fpga/fpga_private.h
@@ -7,12 +7,23 @@
 #ifndef FPGA_PRIVATE_H
 #define FPGA_PRIVATE_H
 
-unsigned int plat_fpga_calc_core_pos(u_register_t mpidr);
+#include "../fpga_def.h"
+#include <platform_def.h>
+
+#define C_RUNTIME_READY_KEY	(0xaa55aa55)
+#define VALID_MPID		(1U)
+
+#ifndef __ASSEMBLER__
+
+extern unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT];
 
 void fpga_console_init(void);
 
 void plat_fpga_gic_init(void);
 void fpga_pwr_gic_on_finish(void);
 void fpga_pwr_gic_off(void);
+unsigned int plat_fpga_calc_core_pos(uint32_t mpid);
+
+#endif /* __ASSEMBLER__ */
 
-#endif
+#endif /* FPGA_PRIVATE_H */
diff --git a/plat/arm/board/arm_fpga/fpga_topology.c b/plat/arm/board/arm_fpga/fpga_topology.c
index a2908d7..7fead86 100644
--- a/plat/arm/board/arm_fpga/fpga_topology.c
+++ b/plat/arm/board/arm_fpga/fpga_topology.c
@@ -5,15 +5,20 @@
  */
 
 #include <arch_helpers.h>
+#include <common/debug.h>
+#include <lib/spinlock.h>
 
 #include "fpga_private.h"
+#include <plat/common/platform.h>
 #include <platform_def.h>
 
-static unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2];
+unsigned char fpga_power_domain_tree_desc[FPGA_MAX_CLUSTER_COUNT + 2];
+unsigned char fpga_valid_mpids[PLATFORM_CORE_COUNT];
 
 const unsigned char *plat_get_power_domain_tree_desc(void)
 {
-	int i;
+	unsigned int i;
+
 	/*
 	* The highest level is the system level. The next level is constituted
 	* by clusters and then cores in clusters.
@@ -21,12 +26,26 @@
 	* This description of the power domain topology is aligned with the CPU
 	* indices returned by the plat_core_pos_by_mpidr() and plat_my_core_pos()
 	* APIs.
+	*
+	* A description of the topology tree can be found at
+	* https://trustedfirmware-a.readthedocs.io/en/latest/design/psci-pd-tree.html#design
 	*/
-	fpga_power_domain_tree_desc[0] = 1;
-	fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT;
 
-	for (i = 0; i < FPGA_MAX_CLUSTER_COUNT; i++) {
-		fpga_power_domain_tree_desc[i + 2] = FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU;
+	if (fpga_power_domain_tree_desc[0] == 0U) {
+		/*
+		 * As fpga_power_domain_tree_desc[0] == 0, assume that the
+		 * Power Domain Topology Tree has not been initialized, so
+		 * perform the initialization here.
+		 */
+
+		fpga_power_domain_tree_desc[0] = 1U;
+		fpga_power_domain_tree_desc[1] = FPGA_MAX_CLUSTER_COUNT;
+
+		for (i = 0U; i < FPGA_MAX_CLUSTER_COUNT; i++) {
+			fpga_power_domain_tree_desc[2 + i] =
+				(FPGA_MAX_CPUS_PER_CLUSTER *
+				 FPGA_MAX_PE_PER_CPU);
+		}
 	}
 
 	return fpga_power_domain_tree_desc;
@@ -34,40 +53,25 @@
 
 int plat_core_pos_by_mpidr(u_register_t mpidr)
 {
-	unsigned int cluster_id, cpu_id, thread_id;
+	unsigned int core_pos;
 
-	/*
-	 * The image running on the FPGA may or may not implement
-	 * multithreading, and it shouldn't be assumed this is consistent
-	 * across all CPUs.
-	 * This ensures that any passed mpidr values reflect the status of the
-	 * primary CPU's MT bit.
-	 */
+	mpidr &= (MPID_MASK & ~(MPIDR_AFFLVL_MASK << MPIDR_AFF3_SHIFT));
 	mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK);
-	mpidr &= MPID_MASK;
 
-	if (mpidr & MPIDR_MT_MASK) {
-		thread_id = MPIDR_AFFLVL0_VAL(mpidr);
-		cpu_id = MPIDR_AFFLVL1_VAL(mpidr);
-		cluster_id = MPIDR_AFFLVL2_VAL(mpidr);
-	} else {
-		thread_id = 0;
-		cpu_id = MPIDR_AFFLVL0_VAL(mpidr);
-		cluster_id = MPIDR_AFFLVL1_VAL(mpidr);
+	if ((MPIDR_AFFLVL2_VAL(mpidr) >= FPGA_MAX_CLUSTER_COUNT) ||
+	    (MPIDR_AFFLVL1_VAL(mpidr) >= FPGA_MAX_CPUS_PER_CLUSTER) ||
+	    (MPIDR_AFFLVL0_VAL(mpidr) >= FPGA_MAX_PE_PER_CPU)) {
+		ERROR ("Invalid mpidr: 0x%08x\n", (uint32_t)mpidr);
+		panic();
 	}
 
-	if (cluster_id >= FPGA_MAX_CLUSTER_COUNT) {
-		return -1;
-	}
-
-	if (cpu_id >= FPGA_MAX_CPUS_PER_CLUSTER) {
-		return -1;
-	}
+	/* Calculate the core position, based on the maximum topology. */
+	core_pos = plat_fpga_calc_core_pos(mpidr);
 
-	if (thread_id >= FPGA_MAX_PE_PER_CPU) {
+	/* Check whether this core is actually present. */
+	if (fpga_valid_mpids[core_pos] != VALID_MPID) {
 		return -1;
 	}
 
-	/* Calculate the correct core, catering for multi-threaded images */
-	return (int) plat_fpga_calc_core_pos(mpidr);
+	return core_pos;
 }
diff --git a/plat/arm/board/arm_fpga/include/platform_def.h b/plat/arm/board/arm_fpga/include/platform_def.h
index 31fc987..411b936 100644
--- a/plat/arm/board/arm_fpga/include/platform_def.h
+++ b/plat/arm/board/arm_fpga/include/platform_def.h
@@ -21,11 +21,12 @@
 #define CACHE_WRITEBACK_SHIFT		U(6)
 #define CACHE_WRITEBACK_GRANULE		(U(1) << CACHE_WRITEBACK_SHIFT)
 
-#define PLATFORM_CORE_COUNT \
-	(FPGA_MAX_CLUSTER_COUNT * FPGA_MAX_CPUS_PER_CLUSTER * FPGA_MAX_PE_PER_CPU)
+#define PLATFORM_CORE_COUNT		\
+	(FPGA_MAX_CLUSTER_COUNT *	\
+	 FPGA_MAX_CPUS_PER_CLUSTER *	\
+	 FPGA_MAX_PE_PER_CPU)
 
-#define PLAT_NUM_PWR_DOMAINS		(FPGA_MAX_CLUSTER_COUNT + \
-					PLATFORM_CORE_COUNT) + 1
+#define PLAT_NUM_PWR_DOMAINS (FPGA_MAX_CLUSTER_COUNT + PLATFORM_CORE_COUNT + 1)
 
 #if !ENABLE_PIE
 #define BL31_BASE			UL(0x80000000)