Merge "feat(cm): context switch MDCR_EL3 register" into integration
diff --git a/common/feat_detect.c b/common/feat_detect.c
index 09088c9..8e3e3dd 100644
--- a/common/feat_detect.c
+++ b/common/feat_detect.c
@@ -289,7 +289,7 @@
 	 * revisions so that we catch them as they come along
 	 */
 	check_feature(FEAT_STATE_ALWAYS, read_feat_pmuv3_id_field(),
-		      "PMUv3", 1, ID_AA64DFR0_PMUVER_PMUV3P7);
+		      "PMUv3", 1, ID_AA64DFR0_PMUVER_PMUV3P8);
 
 	/* v8.1 features */
 	check_feature(ENABLE_FEAT_PAN, read_feat_pan_id_field(), "PAN", 1, 3);
diff --git a/drivers/auth/mbedtls/mbedtls_psa_crypto.c b/drivers/auth/mbedtls/mbedtls_psa_crypto.c
index 99242e3..2da97dc 100644
--- a/drivers/auth/mbedtls/mbedtls_psa_crypto.c
+++ b/drivers/auth/mbedtls/mbedtls_psa_crypto.c
@@ -446,7 +446,9 @@
 	 */
 	if (pk_alg == MBEDTLS_PK_RSASSA_PSS) {
 		rc = pk_bytes_from_subpubkey((unsigned char **) &pk_ptr, &pk_len);
-		goto end2;
+		if (rc != 0) {
+			goto end2;
+		}
 	}
 
 	/* Get the key_id using import API */
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index 4eb54ed..df0dcc3 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -238,7 +238,7 @@
 #define ID_AA64DFR0_PMUVER_SHIFT	U(8)
 #define ID_AA64DFR0_PMUVER_MASK		U(0xf)
 #define ID_AA64DFR0_PMUVER_PMUV3	U(1)
-#define ID_AA64DFR0_PMUVER_PMUV3P7	U(7)
+#define ID_AA64DFR0_PMUVER_PMUV3P8	U(8)
 #define ID_AA64DFR0_PMUVER_IMP_DEF	U(0xf)
 
 /* ID_AA64DFR0_EL1.SEBEP definitions */
diff --git a/include/drivers/cadence/cdns_sdmmc.h b/include/drivers/cadence/cdns_sdmmc.h
index 6452725..8bf3b78 100644
--- a/include/drivers/cadence/cdns_sdmmc.h
+++ b/include/drivers/cadence/cdns_sdmmc.h
@@ -280,9 +280,6 @@
 #define SDMMC_CDN(_reg)				(SDMMC_CDN_REG_BASE + \
 								(SDMMC_CDN_##_reg))
 
-/* Refer to atf/tools/cert_create/include/debug.h */
-#define BIT_32(nr)					(U(1) << (nr))
-
 /* MMC Peripheral Definition */
 #define SOCFPGA_MMC_BLOCK_SIZE		U(8192)
 #define SOCFPGA_MMC_BLOCK_MASK		(SOCFPGA_MMC_BLOCK_SIZE - U(1))
diff --git a/include/lib/utils_def.h b/include/lib/utils_def.h
index 8a03c7d..c3f767e 100644
--- a/include/lib/utils_def.h
+++ b/include/lib/utils_def.h
@@ -19,8 +19,13 @@
 
 #define SIZE_FROM_LOG2_WORDS(n)		(U(4) << (n))
 
+#if defined(__LINKER__) || defined(__ASSEMBLER__)
 #define BIT_32(nr)			(U(1) << (nr))
 #define BIT_64(nr)			(ULL(1) << (nr))
+#else
+#define BIT_32(nr)			(((uint32_t)(1U)) << (nr))
+#define BIT_64(nr)			(((uint64_t)(1ULL)) << (nr))
+#endif
 
 #ifdef __aarch64__
 #define BIT				BIT_64
@@ -29,22 +34,22 @@
 #endif
 
 /*
- * Create a contiguous bitmask starting at bit position @l and ending at
- * position @h. For example
+ * Create a contiguous bitmask starting at bit position @low and ending at
+ * position @high. For example
  * GENMASK_64(39, 21) gives us the 64bit vector 0x000000ffffe00000.
  */
 #if defined(__LINKER__) || defined(__ASSEMBLER__)
-#define GENMASK_32(h, l) \
-	(((0xFFFFFFFF) << (l)) & (0xFFFFFFFF >> (32 - 1 - (h))))
+#define GENMASK_32(high, low) \
+	(((0xFFFFFFFF) << (low)) & (0xFFFFFFFF >> (32 - 1 - (high))))
 
-#define GENMASK_64(h, l) \
-	((~0 << (l)) & (~0 >> (64 - 1 - (h))))
+#define GENMASK_64(high, low) \
+	((~0 << (low)) & (~0 >> (64 - 1 - (high))))
 #else
-#define GENMASK_32(h, l) \
-	(((~UINT32_C(0)) << (l)) & (~UINT32_C(0) >> (32 - 1 - (h))))
+#define GENMASK_32(high, low) \
+	((~UINT32_C(0) >> (32U - 1U - (high))) ^ ((BIT_32(low) - 1U)))
 
-#define GENMASK_64(h, l) \
-	(((~UINT64_C(0)) << (l)) & (~UINT64_C(0) >> (64 - 1 - (h))))
+#define GENMASK_64(high, low) \
+	((~UINT64_C(0) >> (64U - 1U - (high))) ^ ((BIT_64(low) - 1U)))
 #endif
 
 #ifdef __aarch64__
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 058f7fa..981fddc 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -262,13 +262,9 @@
 #if CTX_INCLUDE_EL2_REGS
 
 	/*
-	 * Initialize SCTLR_EL2 context register using Endianness value
-	 * taken from the entrypoint attribute.
+	 * Initialize SCTLR_EL2 context register with reset value.
 	 */
-	u_register_t sctlr_el2_val = (EP_GET_EE(ep->h.attr) != 0U) ? SCTLR_EE_BIT : 0UL;
-	sctlr_el2_val |= SCTLR_EL2_RES1;
-	write_el2_ctx_common(get_el2_sysregs_ctx(ctx), sctlr_el2, sctlr_el2_val);
-
+	write_el2_ctx_common(get_el2_sysregs_ctx(ctx), sctlr_el2, SCTLR_EL2_RES1);
 
 	if (is_feat_hcx_supported()) {
 		/*
@@ -1011,7 +1007,7 @@
  ******************************************************************************/
 void cm_prepare_el3_exit(uint32_t security_state)
 {
-	u_register_t sctlr_elx, scr_el3;
+	u_register_t sctlr_el2, scr_el3;
 	cpu_context_t *ctx = cm_get_context(security_state);
 
 	assert(ctx != NULL);
@@ -1052,20 +1048,17 @@
 
 			/* Condition to ensure EL2 is being used. */
 			if ((scr_el3 & SCR_HCE_BIT) != 0U) {
-				/* Use SCTLR_EL1.EE value to initialise sctlr_el2 */
-				sctlr_elx = read_ctx_reg(get_el1_sysregs_ctx(ctx),
-								CTX_SCTLR_EL1);
-				sctlr_elx &= SCTLR_EE_BIT;
-				sctlr_elx |= SCTLR_EL2_RES1;
+				/* Initialize SCTLR_EL2 register with reset value. */
+				sctlr_el2 = SCTLR_EL2_RES1;
 #if ERRATA_A75_764081
 				/*
 				 * If workaround of errata 764081 for Cortex-A75
 				 * is used then set SCTLR_EL2.IESB to enable
 				 * Implicit Error Synchronization Barrier.
 				 */
-				sctlr_elx |= SCTLR_IESB_BIT;
-#endif /* ERRATA_A75_764081 */
-				write_sctlr_el2(sctlr_elx);
+				sctlr_el2 |= SCTLR_IESB_BIT;
+#endif
+				write_sctlr_el2(sctlr_el2);
 			} else {
 				/*
 				 * (scr_el3 & SCR_HCE_BIT==0)
diff --git a/lib/gpt_rme/gpt_rme.c b/lib/gpt_rme/gpt_rme.c
index ee502de..d028fce 100644
--- a/lib/gpt_rme/gpt_rme.c
+++ b/lib/gpt_rme/gpt_rme.c
@@ -847,7 +847,7 @@
 	assert(GPT_L0_IDX(first) == GPT_L0_IDX(last));
 
 #if (RME_GPT_MAX_BLOCK != 0)
-	while (first < last) {
+	while (first <= last) {
 		/* Region length */
 		size_t length = last - first + GPT_PGS_ACTUAL_SIZE(gpt_config.p);
 
diff --git a/plat/arm/board/arm_fpga/platform.mk b/plat/arm/board/arm_fpga/platform.mk
index bf7c59b..82401db 100644
--- a/plat/arm/board/arm_fpga/platform.mk
+++ b/plat/arm/board/arm_fpga/platform.mk
@@ -41,8 +41,12 @@
 ENABLE_FEAT_ECV			:= 2
 ENABLE_FEAT_FGT			:= 2
 ENABLE_FEAT_HCX			:= 2
+ENABLE_FEAT_MTE2		:= 2
+ENABLE_FEAT_TCR2		:= 2
 ENABLE_SYS_REG_TRACE_FOR_NS	:= 2
 ENABLE_TRF_FOR_NS		:= 2
+ENABLE_SME_FOR_NS		:= 2
+ENABLE_SME2_FOR_NS		:= 2
 
 # Treating this as a memory-constrained port for now
 USE_COHERENT_MEM	:=	0
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index 7377a01..f5919ab 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -26,7 +26,7 @@
   else ifeq (${ARM_TSP_RAM_LOCATION}, dram)
     ARM_TSP_RAM_LOCATION_ID = ARM_DRAM_ID
   else
-    $(error "Unsupported ARM_TSP_RAM_LOCATION value")
+    $(error Unsupported ARM_TSP_RAM_LOCATION value)
   endif
 
   # Process flags
@@ -83,7 +83,7 @@
 # memory. This means we must not run BL31 from TZC-protected DRAM.
 ifeq (${ARM_BL31_IN_DRAM},1)
   ifeq (${ENABLE_RME},1)
-    $(error "BL31 must not run from DRAM on RME-systems. Please set ARM_BL31_IN_DRAM to 0")
+    $(error BL31 must not run from DRAM on RME-systems. Please set ARM_BL31_IN_DRAM to 0)
   endif
 endif
 
@@ -105,16 +105,15 @@
 ifeq (${ARM_LINUX_KERNEL_AS_BL33},1)
   ifneq (${ARCH},aarch64)
     ifneq (${RESET_TO_SP_MIN},1)
-      $(error "ARM_LINUX_KERNEL_AS_BL33 is only available if RESET_TO_SP_MIN=1.")
+      $(error ARM_LINUX_KERNEL_AS_BL33 is only available if RESET_TO_SP_MIN=1.)
     endif
   endif
   ifndef PRELOADED_BL33_BASE
-    $(error "PRELOADED_BL33_BASE must be set if ARM_LINUX_KERNEL_AS_BL33 is used.")
+    $(error PRELOADED_BL33_BASE must be set if ARM_LINUX_KERNEL_AS_BL33 is used.)
   endif
   ifeq (${RESET_TO_BL31},1)
     ifndef ARM_PRELOADED_DTB_BASE
-      $(error "ARM_PRELOADED_DTB_BASE must be set if ARM_LINUX_KERNEL_AS_BL33 is
-       used with RESET_TO_BL31.")
+      $(error ARM_PRELOADED_DTB_BASE must be set if ARM_LINUX_KERNEL_AS_BL33 is used with RESET_TO_BL31.)
     endif
     $(eval $(call add_define,ARM_PRELOADED_DTB_BASE))
   endif
@@ -456,6 +455,6 @@
 
 ifeq (${RECLAIM_INIT_CODE}, 1)
     ifeq (${ARM_XLAT_TABLES_LIB_V1}, 1)
-        $(error "To reclaim init code xlat tables v2 must be used")
+        $(error To reclaim init code xlat tables v2 must be used)
     endif
 endif
diff --git a/plat/qemu/qemu_sbsa/sbsa_sip_svc.c b/plat/qemu/qemu_sbsa/sbsa_sip_svc.c
index 535f0eb..83e66f3 100644
--- a/plat/qemu/qemu_sbsa/sbsa_sip_svc.c
+++ b/plat/qemu/qemu_sbsa/sbsa_sip_svc.c
@@ -30,6 +30,7 @@
 #define SIP_SVC_GET_GIC_ITS SIP_FUNCTION_ID(101)
 #define SIP_SVC_GET_CPU_COUNT SIP_FUNCTION_ID(200)
 #define SIP_SVC_GET_CPU_NODE SIP_FUNCTION_ID(201)
+#define SIP_SVC_GET_CPU_TOPOLOGY SIP_FUNCTION_ID(202)
 #define SIP_SVC_GET_MEMORY_NODE_COUNT SIP_FUNCTION_ID(300)
 #define SIP_SVC_GET_MEMORY_NODE SIP_FUNCTION_ID(301)
 
@@ -46,10 +47,24 @@
 	uint64_t addr_size;
 } memory_data;
 
+/*
+ * sockets: the number of sockets on sbsa-ref platform.
+ * clusters: the number of clusters in one socket.
+ * cores: the number of cores in one cluster.
+ * threads: the number of threads in one core.
+ */
+typedef struct {
+	uint32_t sockets;
+	uint32_t clusters;
+	uint32_t cores;
+	uint32_t threads;
+} cpu_topology;
+
 static struct {
 	uint32_t num_cpus;
 	uint32_t num_memnodes;
 	cpu_data cpu[PLATFORM_CORE_COUNT];
+	cpu_topology cpu_topo;
 	memory_data memory[PLAT_MAX_MEM_NODES];
 } dynamic_platform_info;
 
@@ -71,12 +86,46 @@
  * cope.
  */
 
+static void read_cpu_topology_from_dt(void *dtb)
+{
+	int node;
+
+	/*
+	 * QEMU gives us this DeviceTree node when we config:
+	 * -smp 16,sockets=2,clusters=2,cores=2,threads=2
+	 *
+	 * topology {
+	 *	threads = <0x02>;
+	 *	cores = <0x02>;
+	 *	clusters = <0x02>;
+	 *	sockets = <0x02>;
+	 * };
+	 */
+
+	node = fdt_path_offset(dtb, "/cpus/topology");
+	if (node > 0) {
+		dynamic_platform_info.cpu_topo.sockets =
+			fdt_read_uint32_default(dtb, node, "sockets", 0);
+		dynamic_platform_info.cpu_topo.clusters =
+			fdt_read_uint32_default(dtb, node, "clusters", 0);
+		dynamic_platform_info.cpu_topo.cores =
+			fdt_read_uint32_default(dtb, node, "cores", 0);
+		dynamic_platform_info.cpu_topo.threads =
+			fdt_read_uint32_default(dtb, node, "threads", 0);
+	}
+
+	INFO("Cpu topology: sockets: %d, clusters: %d, cores: %d, threads: %d\n",
+		dynamic_platform_info.cpu_topo.sockets,
+		dynamic_platform_info.cpu_topo.clusters,
+		dynamic_platform_info.cpu_topo.cores,
+		dynamic_platform_info.cpu_topo.threads);
+}
+
 void read_cpuinfo_from_dt(void *dtb)
 {
 	int node;
 	int prev;
 	int cpu = 0;
-	uint32_t nodeid = 0;
 	uintptr_t mpidr;
 
 	/*
@@ -118,14 +167,12 @@
 			panic();
 		}
 
-		if (fdt_getprop(dtb, node, "numa-node-id", NULL))  {
-			fdt_read_uint32(dtb, node, "numa-node-id", &nodeid);
-		}
-
-		dynamic_platform_info.cpu[cpu].nodeid = nodeid;
 		dynamic_platform_info.cpu[cpu].mpidr = mpidr;
+		dynamic_platform_info.cpu[cpu].nodeid =
+			fdt_read_uint32_default(dtb, node, "numa-node-id", 0);
 
-		INFO("CPU %d: node-id: %d, mpidr: %ld\n", cpu, nodeid, mpidr);
+		INFO("CPU %d: node-id: %d, mpidr: %ld\n", cpu,
+				dynamic_platform_info.cpu[cpu].nodeid, mpidr);
 
 		cpu++;
 
@@ -135,6 +182,8 @@
 
 	dynamic_platform_info.num_cpus = cpu;
 	INFO("Found %d cpus\n", dynamic_platform_info.num_cpus);
+
+	read_cpu_topology_from_dt(dtb);
 }
 
 void read_meminfo_from_dt(void *dtb)
@@ -143,7 +192,6 @@
 	const char *type;
 	int prev, node;
 	int len;
-	uint32_t nodeid = 0;
 	uint32_t memnode = 0;
 	uint32_t higher_value, lower_value;
 	uint64_t cur_base, cur_size;
@@ -172,11 +220,8 @@
 
 		type = fdt_getprop(dtb, node, "device_type", &len);
 		if (type && strncmp(type, "memory", len) == 0) {
-			if (fdt_getprop(dtb, node, "numa-node-id", NULL)) {
-				fdt_read_uint32(dtb, node, "numa-node-id", &nodeid);
-			}
-
-			dynamic_platform_info.memory[memnode].nodeid = nodeid;
+			dynamic_platform_info.memory[memnode].nodeid =
+				fdt_read_uint32_default(dtb, node, "numa-node-id", 0);
 
 			/*
 			 * Get the 'reg' property of this node and
@@ -274,10 +319,10 @@
 
 	node = fdt_path_offset(dtb, "/");
 	if (node >= 0) {
-		platform_version_major = fdt32_ld(fdt_getprop(dtb, node,
-							      "machine-version-major", NULL));
-		platform_version_minor = fdt32_ld(fdt_getprop(dtb, node,
-							      "machine-version-minor", NULL));
+		platform_version_major =
+			fdt_read_uint32_default(dtb, node, "machine-version-major", 0);
+		platform_version_minor =
+			fdt_read_uint32_default(dtb, node, "machine-version-minor", 0);
 	}
 }
 
@@ -354,6 +399,18 @@
 			SMC_RET1(handle, SMC_ARCH_CALL_INVAL_PARAM);
 		}
 
+	case SIP_SVC_GET_CPU_TOPOLOGY:
+		if (dynamic_platform_info.cpu_topo.cores > 0) {
+			SMC_RET5(handle, NULL,
+			dynamic_platform_info.cpu_topo.sockets,
+			dynamic_platform_info.cpu_topo.clusters,
+			dynamic_platform_info.cpu_topo.cores,
+			dynamic_platform_info.cpu_topo.threads);
+		} else {
+			/* we do not know topology so we report SMC as unknown */
+			SMC_RET1(handle, SMC_UNK);
+		}
+
 	case SIP_SVC_GET_MEMORY_NODE_COUNT:
 		SMC_RET2(handle, NULL, dynamic_platform_info.num_memnodes);
 
diff --git a/plat/xilinx/versal/bl31_versal_setup.c b/plat/xilinx/versal/bl31_versal_setup.c
index 08c0205..594784f 100644
--- a/plat/xilinx/versal/bl31_versal_setup.c
+++ b/plat/xilinx/versal/bl31_versal_setup.c
@@ -118,19 +118,6 @@
 		panic();
 	} else {
 		INFO("BL31: PLM to TF-A handover success %u\n", ret);
-
-		/*
-		 * The BL32 load address is indicated as 0x0 in the handoff
-		 * parameters, which is different from the default/user-provided
-		 * load address of 0x60000000 but the flags are correctly
-		 * configured. Consequently, in this scenario, set the PC
-		 * to the requested BL32_BASE address.
-		 */
-
-		/* TODO: Remove the following check once this is fixed from PLM */
-		if (bl32_image_ep_info.pc == 0 && bl32_image_ep_info.spsr != 0) {
-			bl32_image_ep_info.pc = (uintptr_t)BL32_BASE;
-		}
 	}
 
 	NOTICE("BL31: Secure code at 0x%lx\n", bl32_image_ep_info.pc);
diff --git a/plat/xilinx/versal_net/bl31_versal_net_setup.c b/plat/xilinx/versal_net/bl31_versal_net_setup.c
index d6390e2..5af2b1d 100644
--- a/plat/xilinx/versal_net/bl31_versal_net_setup.c
+++ b/plat/xilinx/versal_net/bl31_versal_net_setup.c
@@ -153,18 +153,6 @@
 
 		INFO("BL31: PLM to TF-A handover success\n");
 
-		/*
-		 * The BL32 load address is indicated as 0x0 in the handoff
-		 * parameters, which is different from the default/user-provided
-		 * load address of 0x60000000 but the flags are correctly
-		 * configured. Consequently, in this scenario, set the PC
-		 * to the requested BL32_BASE address.
-		 */
-
-		/* TODO: Remove the following check once this is fixed from PLM */
-		if (bl32_image_ep_info.pc == 0 && bl32_image_ep_info.spsr != 0) {
-			bl32_image_ep_info.pc = (uintptr_t)BL32_BASE;
-		}
 	} else {
 		INFO("BL31: setting up default configs\n");