FVP: Add support for multi-threaded CPUs

ARM CPUs with multi-threading implementation has more than one
Processing Element in a single physical CPU. Such an implementation will
reflect the following changes in the MPIDR register:

  - The MT bit set;

  - Affinity levels pertaining to cluster and CPUs occupy one level
    higher than in a single-threaded implementation, and the lowest
    affinity level pertains to hardware threads. MPIDR affinity level
    fields essentially appear shifted to left than otherwise.

The FVP port henceforth assumes that both properties above to be
concomitant on a given FVP platform.

To accommodate for varied MPIDR formats at run time, this patch
re-implements the FVP platform-specific functions that translates MPIDR
values to a linear indices, along with required validation. The same
treatment is applied for GICv3 MPIDR hashing function as well.

An FVP-specific build option FVP_MAX_PE_PER_CPU is introduced which
specifies the maximum number of threads implemented per CPU. For
backwards compatibility, its value defaults to 1.

Change-Id: I729b00d3e121d16ce9a03de4f9db36dfac580e3f
Signed-off-by: Jeenu Viswambharan <jeenu.viswambharan@arm.com>
diff --git a/docs/user-guide.rst b/docs/user-guide.rst
index ec8c233..9577269 100644
--- a/docs/user-guide.rst
+++ b/docs/user-guide.rst
@@ -231,10 +231,10 @@
    This build option is deprecated.
 
 -  ``ARM_PLAT_MT``: This flag determines whether the ARM platform layer has to
-   cater for the multi-threading ``MT`` bit when accessing MPIDR. When this
-   flag is set, the functions which deal with MPIDR assume that the ``MT`` bit
-   in MPIDR is set and access the bit-fields in MPIDR accordingly. Default
-   value of this flag is 0.
+   cater for the multi-threading ``MT`` bit when accessing MPIDR. When this flag
+   is set, the functions which deal with MPIDR assume that the ``MT`` bit in
+   MPIDR is set and access the bit-fields in MPIDR accordingly. Default value of
+   this flag is 0. Note that this option is not used on FVP platforms.
 
 -  ``BL2``: This is an optional build option which specifies the path to BL2
    image for the ``fip`` target. In this case, the BL2 in the ARM Trusted
@@ -677,6 +677,10 @@
    -  ``FVP_CCN`` : The CCN driver is selected. This is the default
       if ``FVP_CLUSTER_COUNT`` > 2.
 
+-  ``FVP_MAX_PE_PER_CPU``: Sets the maximum number of PEs implemented on any CPU
+   in the system. This option defaults to 1. Note that the build option
+   ``ARM_PLAT_MT`` doesn't have any effect on FVP platforms.
+
 -  ``FVP_USE_GIC_DRIVER`` : Selects the GIC driver to be built. Options:
 
    -  ``FVP_GIC600`` : The GIC600 implementation of GICv3 is selected
diff --git a/plat/arm/board/fvp/aarch32/fvp_helpers.S b/plat/arm/board/fvp/aarch32/fvp_helpers.S
index e80e199..143972d 100644
--- a/plat/arm/board/fvp/aarch32/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch32/fvp_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -13,6 +13,7 @@
 	.globl	plat_secondary_cold_boot_setup
 	.globl	plat_get_my_entrypoint
 	.globl	plat_is_my_cpu_primary
+	.globl	plat_arm_calc_core_pos
 
 	/* --------------------------------------------------------------------
 	 * void plat_secondary_cold_boot_setup (void);
@@ -95,10 +96,43 @@
 	 */
 func plat_is_my_cpu_primary
 	ldcopr	r0, MPIDR
-	ldr	r1, =(MPIDR_CLUSTER_MASK | MPIDR_CPU_MASK)
+	ldr	r1, =MPIDR_AFFINITY_MASK
 	and	r0, r1
 	cmp	r0, #FVP_PRIMARY_CPU
 	moveq	r0, #1
 	movne	r0, #0
 	bx	lr
 endfunc plat_is_my_cpu_primary
+
+	/* -----------------------------------------------------
+	 * unsigned int plat_arm_calc_core_pos(u_register_t mpidr)
+	 *
+	 * Function to calculate the core position on FVP.
+	 *
+	 * (ClusterId * FVP_MAX_CPUS_PER_CLUSTER) +
+	 * (CPUId * FVP_MAX_PE_PER_CPU) +
+	 * ThreadId
+	 * -----------------------------------------------------
+	 */
+func plat_arm_calc_core_pos
+	mov	r3, r0
+
+	/*
+	 * Check for MT bit in MPIDR. If not set, shift MPIDR to left to make it
+	 * look as if in a multi-threaded implementation
+	 */
+	tst	r0, #MPIDR_MT_MASK
+	lsleq	r3, r0, #MPIDR_AFFINITY_BITS
+
+	/* Extract individual affinity fields from MPIDR */
+	mov	r2, #FVP_MAX_PE_PER_CPU
+	ubfx	r0, r3, #MPIDR_AFF0_SHIFT, #MPIDR_AFFINITY_BITS
+	ubfx	r1, r3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
+	mla	r0, r1, r2, r0
+
+	mov	r1, #FVP_MAX_CPUS_PER_CLUSTER
+	ubfx	r2, r3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
+	mla	r0, r1, r2, r0
+
+	bx	lr
+endfunc plat_arm_calc_core_pos
diff --git a/plat/arm/board/fvp/aarch64/fvp_helpers.S b/plat/arm/board/fvp/aarch64/fvp_helpers.S
index f4107de..6ea4585 100644
--- a/plat/arm/board/fvp/aarch64/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch64/fvp_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -16,6 +16,7 @@
 	.globl	plat_secondary_cold_boot_setup
 	.globl	plat_get_my_entrypoint
 	.globl	plat_is_my_cpu_primary
+	.globl	plat_arm_calc_core_pos
 
 	.macro	fvp_choose_gicmmap  param1, param2, x_tmp, w_tmp, res
 	ldr	\x_tmp, =V2M_SYSREGS_BASE + V2M_SYS_ID
@@ -170,8 +171,43 @@
 	 */
 func plat_is_my_cpu_primary
 	mrs	x0, mpidr_el1
-	and	x0, x0, #(MPIDR_CLUSTER_MASK | MPIDR_CPU_MASK)
+	ldr	x1, =MPIDR_AFFINITY_MASK
+	and	x0, x0, x1
 	cmp	x0, #FVP_PRIMARY_CPU
 	cset	w0, eq
 	ret
 endfunc plat_is_my_cpu_primary
+
+	/* -----------------------------------------------------
+	 * unsigned int plat_arm_calc_core_pos(u_register_t mpidr)
+	 *
+	 * Function to calculate the core position on FVP.
+	 *
+	 * (ClusterId * FVP_MAX_CPUS_PER_CLUSTER) +
+	 * (CPUId * FVP_MAX_PE_PER_CPU) +
+	 * ThreadId
+	 * -----------------------------------------------------
+	 */
+func plat_arm_calc_core_pos
+	mov	x3, x0
+
+	/*
+	 * Check for MT bit in MPIDR. If not set, shift MPIDR to left to make it
+	 * look as if in a multi-threaded implementation.
+	 */
+	tst	x0, #MPIDR_MT_MASK
+	lsl	x3, x0, #MPIDR_AFFINITY_BITS
+	csel	x3, x3, x0, eq
+
+	/* Extract individual affinity fields from MPIDR */
+	ubfx	x0, x3, #MPIDR_AFF0_SHIFT, #MPIDR_AFFINITY_BITS
+	ubfx	x1, x3, #MPIDR_AFF1_SHIFT, #MPIDR_AFFINITY_BITS
+	ubfx	x2, x3, #MPIDR_AFF2_SHIFT, #MPIDR_AFFINITY_BITS
+
+	/* Compute linear position */
+	mov	x4, #FVP_MAX_PE_PER_CPU
+	madd	x0, x1, x4, x0
+	mov	x5, #FVP_MAX_CPUS_PER_CLUSTER
+	madd	x0, x2, x5, x0
+	ret
+endfunc plat_arm_calc_core_pos
diff --git a/plat/arm/board/fvp/fvp_def.h b/plat/arm/board/fvp/fvp_def.h
index d4f9d92..84e790b 100644
--- a/plat/arm/board/fvp/fvp_def.h
+++ b/plat/arm/board/fvp/fvp_def.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2014-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -12,6 +12,10 @@
 #endif
 #define FVP_MAX_CPUS_PER_CLUSTER	4
 
+#ifndef FVP_MAX_PE_PER_CPU
+# define FVP_MAX_PE_PER_CPU		1
+#endif
+
 #define FVP_PRIMARY_CPU			0x0
 
 /* Defines for the Interconnect build selection */
diff --git a/plat/arm/board/fvp/fvp_topology.c b/plat/arm/board/fvp/fvp_topology.c
index 055b985..848aaf8 100644
--- a/plat/arm/board/fvp/fvp_topology.c
+++ b/plat/arm/board/fvp/fvp_topology.c
@@ -1,10 +1,11 @@
 /*
- * Copyright (c) 2013-2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
 
 #include <arch.h>
+#include <arm_config.h>
 #include <cassert.h>
 #include <plat_arm.h>
 #include <platform_def.h>
@@ -55,11 +56,18 @@
  ******************************************************************************/
 int plat_core_pos_by_mpidr(u_register_t mpidr)
 {
-	if (arm_check_mpidr(mpidr) == -1)
-		return -1;
-
 	if (fvp_pwrc_read_psysr(mpidr) == PSYSR_INVALID)
 		return -1;
 
+	/*
+	 * Core position calculation for FVP platform depends on the MT bit in
+	 * MPIDR. This function cannot assume that the supplied MPIDR has the MT
+	 * bit set even if the implementation has. For example, PSCI clients
+	 * might supply MPIDR values without the MT bit set. Therefore, we
+	 * inject the current PE's MT bit so as to get the calculation correct.
+	 * This of course assumes that none or all CPUs on the platform has MT
+	 * bit set.
+	 */
+	mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK);
 	return plat_arm_calc_core_pos(mpidr);
 }
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index f13fc8e..7a7cf9e 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -16,10 +16,13 @@
 #include "../fvp_def.h"
 
 /* Required platform porting definitions */
+#define PLATFORM_CORE_COUNT \
+	(FVP_CLUSTER_COUNT * FVP_MAX_CPUS_PER_CLUSTER * FVP_MAX_PE_PER_CPU)
+
 #define PLAT_NUM_PWR_DOMAINS		(FVP_CLUSTER_COUNT + \
 					PLATFORM_CORE_COUNT)
+
 #define PLAT_MAX_PWR_LVL		ARM_PWR_LVL1
-#define PLATFORM_CORE_COUNT		(FVP_CLUSTER_COUNT * FVP_MAX_CPUS_PER_CLUSTER)
 
 /*
  * Other platform porting definitions are provided by included headers
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index d6e8ced..d9c624c 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -6,19 +6,28 @@
 
 # Use the GICv3 driver on the FVP by default
 FVP_USE_GIC_DRIVER	:= FVP_GICV3
+
 # Use the SP804 timer instead of the generic one
 FVP_USE_SP804_TIMER	:= 0
 
+# Default cluster count for FVP
+FVP_CLUSTER_COUNT	:= 2
+
+# Default number of threads per CPU on FVP
+FVP_MAX_PE_PER_CPU	:= 1
+
 $(eval $(call assert_boolean,FVP_USE_SP804_TIMER))
 $(eval $(call add_define,FVP_USE_SP804_TIMER))
 
 # The FVP platform depends on this macro to build with correct GIC driver.
 $(eval $(call add_define,FVP_USE_GIC_DRIVER))
 
-# Define default FVP_CLUSTER_COUNT to 2 and pass it into the build system.
-FVP_CLUSTER_COUNT	:= 2
+# Pass FVP_CLUSTER_COUNT to the build system.
 $(eval $(call add_define,FVP_CLUSTER_COUNT))
 
+# Pass FVP_MAX_PE_PER_CPU to the build system.
+$(eval $(call add_define,FVP_MAX_PE_PER_CPU))
+
 # Sanity check the cluster count and if FVP_CLUSTER_COUNT <= 2,
 # choose the CCI driver , else the CCN driver
 ifeq ($(FVP_CLUSTER_COUNT), 0)
diff --git a/plat/arm/common/arm_gicv3.c b/plat/arm/common/arm_gicv3.c
index 960f691..c9bba09 100644
--- a/plat/arm/common/arm_gicv3.c
+++ b/plat/arm/common/arm_gicv3.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -35,6 +35,26 @@
 	PLAT_ARM_G0_IRQS
 };
 
+/*
+ * MPIDR hashing function for translating MPIDRs read from GICR_TYPER register
+ * to core position.
+ *
+ * Calculating core position is dependent on MPIDR_EL1.MT bit. However, affinity
+ * values read from GICR_TYPER don't have an MT field. To reuse the same
+ * translation used for CPUs, we insert MT bit read from the PE's MPIDR into
+ * that read from GICR_TYPER.
+ *
+ * Assumptions:
+ *
+ *   - All CPUs implemented in the system have MPIDR_EL1.MT bit set;
+ *   - No CPUs implemented in the system use affinity level 3.
+ */
+static unsigned int arm_gicv3_mpidr_hash(u_register_t mpidr)
+{
+	mpidr |= (read_mpidr_el1() & MPIDR_MT_MASK);
+	return plat_arm_calc_core_pos(mpidr);
+}
+
 const gicv3_driver_data_t arm_gic_data = {
 	.gicd_base = PLAT_ARM_GICD_BASE,
 	.gicr_base = PLAT_ARM_GICR_BASE,
@@ -44,7 +64,7 @@
 	.g1s_interrupt_array = g1s_interrupt_array,
 	.rdistif_num = PLATFORM_CORE_COUNT,
 	.rdistif_base_addrs = rdistif_base_addrs,
-	.mpidr_to_core_pos = plat_arm_calc_core_pos
+	.mpidr_to_core_pos = arm_gicv3_mpidr_hash
 };
 
 void plat_arm_gic_driver_init(void)