feat(pmu): introduce pmuv3 lib/extensions folder

The enablement code for the PMU is scattered and difficult to track
down. Factor out the feature into its own lib/extensions folder and
consolidate the implementation. Treat it is as an architecturally
mandatory feature as it is currently.

Additionally, do some cleanup on AArch64. Setting overflow bits in
PMCR_EL0 is irrelevant for firmware so don't do it. Then delay the PMU
initialisation until the context management stage which simplifies the
early environment assembly. One side effect is that the PMU might count
before this happens so reset all counters to 0 to prevent any leakage.

Finally, add an enable to manage_extensions_realm() as realm world uses
the pmu. This introduces the HPMN fixup to realm world.

Signed-off-by: Boyan Karatotev <boyan.karatotev@arm.com>
Change-Id: Ie13a8625820ecc5fbfa467dc6ca18025bf6a9cd3
diff --git a/Makefile b/Makefile
index 8d3ffe1..7fa54ca 100644
--- a/Makefile
+++ b/Makefile
@@ -523,6 +523,7 @@
 				drivers/console/multi_console.c		\
 				lib/${ARCH}/cache_helpers.S		\
 				lib/${ARCH}/misc_helpers.S		\
+				lib/extensions/pmuv3/${ARCH}/pmuv3.c	\
 				plat/common/plat_bl_common.c		\
 				plat/common/plat_log_common.c		\
 				plat/common/${ARCH}/plat_common.c	\
diff --git a/common/feat_detect.c b/common/feat_detect.c
index 50b74d0..871b52d 100644
--- a/common/feat_detect.c
+++ b/common/feat_detect.c
@@ -144,6 +144,14 @@
 	check_feature(ENABLE_FEAT_SB, read_feat_sb_id_field(), "SB", 1, 1);
 	check_feature(ENABLE_FEAT_CSV2_2, read_feat_csv2_id_field(),
 		      "CSV2_2", 2, 3);
+	/*
+	 * Even though the PMUv3 is an OPTIONAL feature, it is always
+	 * implemented and Arm prescribes so. So assume it will be there and do
+	 * away with a flag for it. This is used to check minor PMUv3px
+	 * revisions so that we catch them as they come along
+	 */
+	check_feature(FEAT_STATE_ALWAYS, read_feat_pmuv3_id_field(),
+		      "PMUv3", 1, ID_AA64DFR0_PMUVER_PMUV3P7);
 
 	/* v8.1 features */
 	check_feature(ENABLE_FEAT_PAN, read_feat_pan_id_field(), "PAN", 1, 3);
diff --git a/include/arch/aarch32/arch.h b/include/arch/aarch32/arch.h
index c8a6334..979e43a 100644
--- a/include/arch/aarch32/arch.h
+++ b/include/arch/aarch32/arch.h
@@ -104,7 +104,11 @@
 /* CSSELR definitions */
 #define LEVEL_SHIFT		U(1)
 
-/* ID_DFR0_EL1 definitions */
+/* ID_DFR0 definitions */
+#define ID_DFR0_PERFMON_SHIFT		U(24)
+#define ID_DFR0_PERFMON_MASK		U(0xf)
+#define ID_DFR0_PERFMON_PMUV3		U(3)
+#define ID_DFR0_PERFMON_PMUV3P5		U(6)
 #define ID_DFR0_COPTRC_SHIFT		U(12)
 #define ID_DFR0_COPTRC_MASK		U(0xf)
 #define ID_DFR0_COPTRC_SUPPORTED	U(1)
@@ -464,6 +468,10 @@
 #define PMCR_LP_BIT		(U(1) << 7)
 #define PMCR_LC_BIT		(U(1) << 6)
 #define PMCR_DP_BIT		(U(1) << 5)
+#define PMCR_X_BIT		(U(1) << 4)
+#define PMCR_C_BIT		(U(1) << 2)
+#define PMCR_P_BIT		(U(1) << 1)
+#define PMCR_E_BIT		(U(1) << 0)
 #define	PMCR_RESET_VAL		U(0x0)
 
 /*******************************************************************************
diff --git a/include/arch/aarch32/arch_features.h b/include/arch/aarch32/arch_features.h
index 99e3fd0..a6fe1a5 100644
--- a/include/arch/aarch32/arch_features.h
+++ b/include/arch/aarch32/arch_features.h
@@ -162,4 +162,9 @@
 static inline bool is_feat_s1pie_supported(void) { return false; }
 static inline bool is_feat_sxpie_supported(void) { return false; }
 
+static inline unsigned int read_feat_pmuv3_id_field(void)
+{
+	return ISOLATE_FIELD(read_id_dfr0(), ID_DFR0_PERFMON);
+}
+
 #endif /* ARCH_FEATURES_H */
diff --git a/include/arch/aarch32/arch_helpers.h b/include/arch/aarch32/arch_helpers.h
index ca5a44b..d30c1de 100644
--- a/include/arch/aarch32/arch_helpers.h
+++ b/include/arch/aarch32/arch_helpers.h
@@ -290,7 +290,7 @@
 DEFINE_COPROCR_RW_FUNCS(sdcr, SDCR)
 DEFINE_COPROCR_RW_FUNCS(hdcr, HDCR)
 DEFINE_COPROCR_RW_FUNCS(cnthp_ctl, CNTHP_CTL)
-DEFINE_COPROCR_READ_FUNC(pmcr, PMCR)
+DEFINE_COPROCR_RW_FUNCS(pmcr, PMCR)
 
 /*
  * Address translation
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index f3b1ca5..0af7c83 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -221,6 +221,12 @@
 #define ID_AA64DFR0_TRACEFILT_MASK	U(0xf)
 #define ID_AA64DFR0_TRACEFILT_SUPPORTED	U(1)
 #define ID_AA64DFR0_TRACEFILT_LENGTH	U(4)
+#define ID_AA64DFR0_PMUVER_LENGTH	U(4)
+#define ID_AA64DFR0_PMUVER_SHIFT	U(8)
+#define ID_AA64DFR0_PMUVER_MASK		U(0xf)
+#define ID_AA64DFR0_PMUVER_PMUV3	U(1)
+#define ID_AA64DFR0_PMUVER_PMUV3P7	U(7)
+#define ID_AA64DFR0_PMUVER_IMP_DEF	U(0xf)
 
 /* ID_AA64DFR0_EL1.PMS definitions (for ARMv8.2+) */
 #define ID_AA64DFR0_PMS_SHIFT		U(32)
@@ -599,12 +605,12 @@
 
 /* MDCR_EL2 definitions */
 #define MDCR_EL2_MTPME		(U(1) << 28)
-#define MDCR_EL2_HLP		(U(1) << 26)
+#define MDCR_EL2_HLP_BIT	(U(1) << 26)
 #define MDCR_EL2_E2TB(x)	((x) << 24)
 #define MDCR_EL2_E2TB_EL1	U(0x3)
-#define MDCR_EL2_HCCD		(U(1) << 23)
+#define MDCR_EL2_HCCD_BIT	(U(1) << 23)
 #define MDCR_EL2_TTRF		(U(1) << 19)
-#define MDCR_EL2_HPMD		(U(1) << 17)
+#define MDCR_EL2_HPMD_BIT	(U(1) << 17)
 #define MDCR_EL2_TPMS		(U(1) << 14)
 #define MDCR_EL2_E2PB(x)	((x) << 12)
 #define MDCR_EL2_E2PB_EL1	U(0x3)
@@ -615,6 +621,7 @@
 #define MDCR_EL2_HPME_BIT	(U(1) << 7)
 #define MDCR_EL2_TPM_BIT	(U(1) << 6)
 #define MDCR_EL2_TPMCR_BIT	(U(1) << 5)
+#define MDCR_EL2_HPMN_MASK	U(0x1f)
 #define MDCR_EL2_RESET_VAL	U(0x0)
 
 /* HSTR_EL2 definitions */
diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h
index 609a95b..6d0ce52 100644
--- a/include/arch/aarch64/arch_features.h
+++ b/include/arch/aarch64/arch_features.h
@@ -639,6 +639,7 @@
 	return read_feat_trbe_id_field() != 0U;
 
 }
+
 /*******************************************************************************
  * Function to identify the presence of FEAT_SMEx (Scalar Matrix Extension)
  ******************************************************************************/
@@ -699,4 +700,9 @@
 			     ID_AA64MMFR0_EL1_TGRAN64);
 }
 
+static inline unsigned int read_feat_pmuv3_id_field(void)
+{
+	return ISOLATE_FIELD(read_id_aa64dfr0_el1(), ID_AA64DFR0_PMUVER);
+}
+
 #endif /* ARCH_FEATURES_H */
diff --git a/include/arch/aarch64/el2_common_macros.S b/include/arch/aarch64/el2_common_macros.S
index dcaea3d..ce6c7e6 100644
--- a/include/arch/aarch64/el2_common_macros.S
+++ b/include/arch/aarch64/el2_common_macros.S
@@ -103,7 +103,7 @@
 	 */
 	mov_imm	x0, ((MDCR_EL2_RESET_VAL | \
 		      MDCR_SPD32(MDCR_SPD32_DISABLE)) \
-		      & ~(MDCR_EL2_HPMD | MDCR_TDOSA_BIT | \
+		      & ~(MDCR_EL2_HPMD_BIT | MDCR_TDOSA_BIT | \
 		      MDCR_TDA_BIT | MDCR_TPM_BIT))
 
 	msr	mdcr_el2, x0
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index 2dee07d..88b0cd2 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -119,22 +119,6 @@
 	 * MDCR_EL3.TPM: Set to zero so that EL0, EL1, and EL2 System register
 	 *  accesses to all Performance Monitors registers do not trap to EL3.
 	 *
-	 * MDCR_EL3.SCCD: Set to one so that cycle counting by PMCCNTR_EL0 is
-	 *  prohibited in Secure state. This bit is RES0 in versions of the
-	 *  architecture with FEAT_PMUv3p5 not implemented, setting it to 1
-	 *  doesn't have any effect on them.
-	 *
-	 * MDCR_EL3.MCCD: Set to one so that cycle counting by PMCCNTR_EL0 is
-	 *  prohibited in EL3. This bit is RES0 in versions of the
-	 *  architecture with FEAT_PMUv3p7 not implemented, setting it to 1
-	 *  doesn't have any effect on them.
-	 *
-	 * MDCR_EL3.SPME: Set to zero so that event counting by the programmable
-	 *  counters PMEVCNTR<n>_EL0 is prohibited in Secure state. If ARMv8.2
-	 *  Debug is not implemented this bit does not have any effect on the
-	 *  counters unless there is support for the implementation defined
-	 *  authentication interface ExternalSecureNoninvasiveDebugEnabled().
-	 *
 	 * MDCR_EL3.NSTB, MDCR_EL3.NSTBE: Set to zero so that Trace Buffer
 	 *  owning security state is Secure state. If FEAT_TRBE is implemented,
 	 *  accesses to Trace Buffer control registers at EL2 and EL1 in any
@@ -149,10 +133,9 @@
 	 * ---------------------------------------------------------------------
 	 */
 	mov_imm	x0, ((MDCR_EL3_RESET_VAL | MDCR_SDD_BIT | \
-		      MDCR_SPD32(MDCR_SPD32_DISABLE) | MDCR_SCCD_BIT | \
-		      MDCR_MCCD_BIT) & ~(MDCR_SPME_BIT | MDCR_TDOSA_BIT | \
-		      MDCR_TDA_BIT | MDCR_TPM_BIT | MDCR_NSTB(MDCR_NSTB_EL1) | \
-		      MDCR_NSTBE | MDCR_TTRF_BIT))
+		      MDCR_SPD32(MDCR_SPD32_DISABLE)) & \
+		    ~(MDCR_TDOSA_BIT | MDCR_TDA_BIT | MDCR_TPM_BIT | \
+		      MDCR_NSTB(MDCR_NSTB_EL1) | MDCR_NSTBE | MDCR_TTRF_BIT))
 
 	mrs	x1, id_aa64dfr0_el1
 	ubfx	x1, x1, #ID_AA64DFR0_TRACEFILT_SHIFT, #ID_AA64DFR0_TRACEFILT_LENGTH
@@ -162,36 +145,6 @@
 	msr	mdcr_el3, x0
 
 	/* ---------------------------------------------------------------------
-	 * Initialise PMCR_EL0 setting all fields rather than relying
-	 * on hw. Some fields are architecturally UNKNOWN on reset.
-	 *
-	 * PMCR_EL0.LP: Set to one so that event counter overflow, that
-	 *  is recorded in PMOVSCLR_EL0[0-30], occurs on the increment
-	 *  that changes PMEVCNTR<n>_EL0[63] from 1 to 0, when ARMv8.5-PMU
-	 *  is implemented. This bit is RES0 in versions of the architecture
-	 *  earlier than ARMv8.5, setting it to 1 doesn't have any effect
-	 *  on them.
-	 *
-	 * PMCR_EL0.LC: Set to one so that cycle counter overflow, that
-	 *  is recorded in PMOVSCLR_EL0[31], occurs on the increment
-	 *  that changes PMCCNTR_EL0[63] from 1 to 0.
-	 *
-	 * PMCR_EL0.DP: Set to one so that the cycle counter,
-	 *  PMCCNTR_EL0 does not count when event counting is prohibited.
-	 *
-	 * PMCR_EL0.X: Set to zero to disable export of events.
-	 *
-	 * PMCR_EL0.D: Set to zero so that, when enabled, PMCCNTR_EL0
-	 *  counts on every clock cycle.
-	 * ---------------------------------------------------------------------
-	 */
-	mov_imm	x0, ((PMCR_EL0_RESET_VAL | PMCR_EL0_LP_BIT | \
-		      PMCR_EL0_LC_BIT | PMCR_EL0_DP_BIT) & \
-		    ~(PMCR_EL0_X_BIT | PMCR_EL0_D_BIT))
-
-	msr	pmcr_el0, x0
-
-	/* ---------------------------------------------------------------------
 	 * Enable External Aborts and SError Interrupts now that the exception
 	 * vectors have been setup.
 	 * ---------------------------------------------------------------------
diff --git a/include/lib/extensions/pmuv3.h b/include/lib/extensions/pmuv3.h
new file mode 100644
index 0000000..5d5d055
--- /dev/null
+++ b/include/lib/extensions/pmuv3.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef PMUV3_H
+#define PMUV3_H
+
+#include <context.h>
+
+void pmuv3_disable_el3(void);
+
+#ifdef __aarch64__
+void pmuv3_enable(cpu_context_t *ctx);
+void pmuv3_init_el2_unused(void);
+#endif /* __aarch64__ */
+
+#endif /* PMUV3_H */
diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c
index 62e30fc..6414aaa 100644
--- a/lib/el3_runtime/aarch32/context_mgmt.c
+++ b/lib/el3_runtime/aarch32/context_mgmt.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2022, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2023, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -17,6 +17,7 @@
 #include <context.h>
 #include <lib/el3_runtime/context_mgmt.h>
 #include <lib/extensions/amu.h>
+#include <lib/extensions/pmuv3.h>
 #include <lib/extensions/sys_reg_trace.h>
 #include <lib/extensions/trf.h>
 #include <lib/utils.h>
@@ -147,6 +148,12 @@
 	if (is_feat_trf_supported()) {
 		trf_enable();
 	}
+
+	/*
+	 * Also applies to PMU < v3. The PMU is only disabled for EL3 and Secure
+	 * state execution. This does not affect lower NS ELs.
+	 */
+	pmuv3_disable_el3();
 #endif
 }
 
diff --git a/lib/el3_runtime/aarch64/context.S b/lib/el3_runtime/aarch64/context.S
index 9922fb1..771fcdc 100644
--- a/lib/el3_runtime/aarch64/context.S
+++ b/lib/el3_runtime/aarch64/context.S
@@ -568,6 +568,8 @@
 	stp	x28, x29, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X28]
 	mrs	x18, sp_el0
 	str	x18, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_SP_EL0]
+
+	/* PMUv3 is presumed to be always present */
 	mrs	x9, pmcr_el0
 	str	x9, [sp, #CTX_EL3STATE_OFFSET + CTX_PMCR_EL0]
 	/* Disable cycle counter when event counting is prohibited */
@@ -651,6 +653,8 @@
 	msr	APGAKeyLo_EL1, x8
 	msr	APGAKeyHi_EL1, x9
 #endif /* CTX_INCLUDE_PAUTH_REGS */
+
+	/* PMUv3 is presumed to be always present */
 	ldr	x0, [sp, #CTX_EL3STATE_OFFSET + CTX_PMCR_EL0]
 	msr	pmcr_el0, x0
 	ldp	x0, x1, [sp, #CTX_GPREGS_OFFSET + CTX_GPREG_X0]
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 44177fa..4a6598a 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -24,6 +24,7 @@
 #include <lib/extensions/amu.h>
 #include <lib/extensions/brbe.h>
 #include <lib/extensions/mpam.h>
+#include <lib/extensions/pmuv3.h>
 #include <lib/extensions/sme.h>
 #include <lib/extensions/spe.h>
 #include <lib/extensions/sve.h>
@@ -266,16 +267,6 @@
 	write_ctx_reg(get_el2_sysregs_ctx(ctx), CTX_ICC_SRE_EL2,
 			icc_sre_el2);
 
-	/*
-	 * Initialize MDCR_EL2.HPMN to its hardware reset value so we don't
-	 * throw anyone off who expects this to be sensible.
-	 * TODO: A similar thing happens in cm_prepare_el3_exit. They should be
-	 * unified with the proper PMU implementation
-	 */
-	u_register_t mdcr_el2 = ((read_pmcr_el0() >> PMCR_EL0_N_SHIFT) &
-			PMCR_EL0_N_MASK);
-	write_ctx_reg(get_el2_sysregs_ctx(ctx), CTX_MDCR_EL2, mdcr_el2);
-
 	if (is_feat_hcx_supported()) {
 		/*
 		 * Initialize register HCRX_EL2 with its init value.
@@ -561,6 +552,7 @@
 #if IMAGE_BL31
 void cm_manage_extensions_el3(void)
 {
+	pmuv3_disable_el3();
 }
 #endif /* IMAGE_BL31 */
 
@@ -570,6 +562,7 @@
 static void manage_extensions_nonsecure(cpu_context_t *ctx)
 {
 #if IMAGE_BL31
+	pmuv3_enable(ctx);
 #endif /* IMAGE_BL31 */
 }
 
@@ -580,6 +573,7 @@
 static void manage_extensions_nonsecure_el2_unused(void)
 {
 #if IMAGE_BL31
+	pmuv3_init_el2_unused();
 #endif /* IMAGE_BL31 */
 }
 
@@ -793,24 +787,11 @@
 			 * relying on hw. Some fields are architecturally
 			 * UNKNOWN on reset.
 			 *
-			 * MDCR_EL2.HLP: Set to one so that event counter
-			 *  overflow, that is recorded in PMOVSCLR_EL0[0-30],
-			 *  occurs on the increment that changes
-			 *  PMEVCNTR<n>_EL0[63] from 1 to 0, when ARMv8.5-PMU is
-			 *  implemented. This bit is RES0 in versions of the
-			 *  architecture earlier than ARMv8.5, setting it to 1
-			 *  doesn't have any effect on them.
-			 *
 			 * MDCR_EL2.TTRF: Set to zero so that access to Trace
 			 *  Filter Control register TRFCR_EL1 at EL1 is not
 			 *  trapped to EL2. This bit is RES0 in versions of
 			 *  the architecture earlier than ARMv8.4.
 			 *
-			 * MDCR_EL2.HPMD: Set to one so that event counting is
-			 *  prohibited at EL2. This bit is RES0 in versions of
-			 *  the architecture earlier than ARMv8.1, setting it
-			 *  to 1 doesn't have any effect on them.
-			 *
 			 * MDCR_EL2.TPMS: Set to zero so that accesses to
 			 *  Statistical Profiling control registers from EL1
 			 *  do not trap to EL2. This bit is RES0 when SPE is
@@ -830,35 +811,15 @@
 			 * MDCR_EL2.TDE: Set to zero so that debug exceptions
 			 *  are not routed to EL2.
 			 *
-			 * MDCR_EL2.HPME: Set to zero to disable EL2 Performance
-			 *  Monitors.
-			 *
-			 * MDCR_EL2.TPM: Set to zero so that Non-secure EL0 and
-			 *  EL1 accesses to all Performance Monitors registers
-			 *  are not trapped to EL2.
-			 *
-			 * MDCR_EL2.TPMCR: Set to zero so that Non-secure EL0
-			 *  and EL1 accesses to the PMCR_EL0 or PMCR are not
-			 *  trapped to EL2.
-			 *
-			 * MDCR_EL2.HPMN: Set to value of PMCR_EL0.N which is the
-			 *  architecturally-defined reset value.
-			 *
 			 * MDCR_EL2.E2TB: Set to zero so that the trace Buffer
 			 *  owning exception level is NS-EL1 and, tracing is
 			 *  prohibited at NS-EL2. These bits are RES0 when
 			 *  FEAT_TRBE is not implemented.
 			 */
-			mdcr_el2 = ((MDCR_EL2_RESET_VAL | MDCR_EL2_HLP |
-				     MDCR_EL2_HPMD) |
-				   ((read_pmcr_el0() & PMCR_EL0_N_BITS)
-				   >> PMCR_EL0_N_SHIFT)) &
-				   ~(MDCR_EL2_TTRF | MDCR_EL2_TPMS |
+			mdcr_el2 = ((MDCR_EL2_RESET_VAL) & ~(MDCR_EL2_TTRF |
 				     MDCR_EL2_TDRA_BIT | MDCR_EL2_TDOSA_BIT |
 				     MDCR_EL2_TDA_BIT | MDCR_EL2_TDE_BIT |
-				     MDCR_EL2_HPME_BIT | MDCR_EL2_TPM_BIT |
-				     MDCR_EL2_TPMCR_BIT |
-				     MDCR_EL2_E2TB(MDCR_EL2_E2TB_EL1));
+				     MDCR_EL2_E2TB(MDCR_EL2_E2TB_EL1)));
 
 			write_mdcr_el2(mdcr_el2);
 
diff --git a/lib/extensions/pmuv3/aarch32/pmuv3.c b/lib/extensions/pmuv3/aarch32/pmuv3.c
new file mode 100644
index 0000000..a4fdb3b
--- /dev/null
+++ b/lib/extensions/pmuv3/aarch32/pmuv3.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_features.h>
+#include <arch_helpers.h>
+#include <lib/extensions/pmuv3.h>
+
+/*
+ * Applies to all PMU versions. Name is PMUv3 for compatibility with aarch64 and
+ * to not clash with platforms which reuse the PMU name
+ */
+void pmuv3_disable_el3(void)
+{
+	u_register_t sdcr = read_sdcr();
+
+	/* ---------------------------------------------------------------------
+	 * Initialise SDCR, setting all the fields rather than relying on hw.
+	 *
+	 * SDCR.SCCD: Set to one so that cycle counting by PMCCNTR is prohibited
+	 *  in Secure state. This bit is RES0 in versions of the architecture
+	 *  earlier than ARMv8.5
+	 *
+	 * SDCR.SPME: Set to zero so that event counting is prohibited in Secure
+	 *  state (and explicitly EL3 with later revisions). If ARMv8.2 Debug is
+	 *  not implemented this bit does not have any effect on the counters
+	 *  unless there is support for the implementation defined
+	 *  authentication interface ExternalSecureNoninvasiveDebugEnabled().
+	 * ---------------------------------------------------------------------
+	 */
+	sdcr = (sdcr | SDCR_SCCD_BIT) & ~SDCR_SPME_BIT;
+	write_sdcr(sdcr);
+
+	/* ---------------------------------------------------------------------
+	 * Initialise PMCR, setting all fields rather than relying
+	 * on hw. Some fields are architecturally UNKNOWN on reset.
+	 *
+	 * PMCR.DP: Set to one to prohibit cycle counting whilst in Secure mode.
+	 *
+	 * PMCR.X: Set to zero to disable export of events.
+	 *
+	 * PMCR.C: Set to one to reset PMCCNTR.
+	 *
+	 * PMCR.P: Set to one to reset each event counter PMEVCNTR<n> to zero.
+	 *
+	 * PMCR.E: Set to zero to disable cycle and event counters.
+	 * ---------------------------------------------------------------------
+	 */
+
+	write_pmcr(read_pmcr() | PMCR_DP_BIT | PMCR_C_BIT | PMCR_P_BIT |
+		 ~(PMCR_X_BIT | PMCR_E_BIT));
+}
diff --git a/lib/extensions/pmuv3/aarch64/pmuv3.c b/lib/extensions/pmuv3/aarch64/pmuv3.c
new file mode 100644
index 0000000..107d12e
--- /dev/null
+++ b/lib/extensions/pmuv3/aarch64/pmuv3.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2023, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_features.h>
+#include <arch_helpers.h>
+#include <lib/extensions/pmuv3.h>
+
+static u_register_t init_mdcr_el2_hpmn(u_register_t mdcr_el2)
+{
+	/*
+	 * Initialize MDCR_EL2.HPMN to its hardware reset value so we don't
+	 * throw anyone off who expects this to be sensible.
+	 */
+	mdcr_el2 &= ~MDCR_EL2_HPMN_MASK;
+	mdcr_el2 |= ((read_pmcr_el0() >> PMCR_EL0_N_SHIFT) & PMCR_EL0_N_MASK);
+
+	return mdcr_el2;
+}
+
+void pmuv3_enable(cpu_context_t *ctx)
+{
+#if CTX_INCLUDE_EL2_REGS
+	u_register_t mdcr_el2;
+
+	mdcr_el2 = read_ctx_reg(get_el2_sysregs_ctx(ctx), CTX_MDCR_EL2);
+	mdcr_el2 = init_mdcr_el2_hpmn(mdcr_el2);
+	write_ctx_reg(get_el2_sysregs_ctx(ctx), CTX_MDCR_EL2, mdcr_el2);
+#endif /* CTX_INCLUDE_EL2_REGS */
+}
+
+void pmuv3_disable_el3(void)
+{
+	u_register_t mdcr_el3 = read_mdcr_el3();
+
+	/* ---------------------------------------------------------------------
+	 * Initialise MDCR_EL3, setting all fields rather than relying on hw.
+	 * Some fields are architecturally UNKNOWN on reset.
+	 *
+	 * MDCR_EL3.MPMX: Set to zero to not affect event counters (when
+	 * SPME = 0).
+	 *
+	 * MDCR_EL3.MCCD: Set to one so that cycle counting by PMCCNTR_EL0 is
+	 *  prohibited in EL3. This bit is RES0 in versions of the
+	 *  architecture with FEAT_PMUv3p7 not implemented.
+	 *
+	 * MDCR_EL3.SCCD: Set to one so that cycle counting by PMCCNTR_EL0 is
+	 *  prohibited in Secure state. This bit is RES0 in versions of the
+	 *  architecture with FEAT_PMUv3p5 not implemented.
+	 *
+	 * MDCR_EL3.SPME: Set to zero so that event counting is prohibited in
+	 *  Secure state (and explicitly EL3 with later revisions). If ARMv8.2
+	 *  Debug is not implemented this bit does not have any effect on the
+	 *  counters unless there is support for the implementation defined
+	 *  authentication interface ExternalSecureNoninvasiveDebugEnabled().
+	 *
+	 * The SPME/MPMX combination is a little tricky. Below is a small
+	 * summary if another combination is ever needed:
+	 * SPME | MPMX | secure world |   EL3
+	 * -------------------------------------
+	 *   0  |  0   |    disabled  | disabled
+	 *   1  |  0   |    enabled   | enabled
+	 *   0  |  1   |    enabled   | disabled
+	 *   1  |  1   |    enabled   | disabled only for counters 0 to
+	 *                              MDCR_EL2.HPMN - 1. Enabled for the rest
+	 */
+	mdcr_el3 = (mdcr_el3 | MDCR_SCCD_BIT | MDCR_MCCD_BIT) &
+		  ~(MDCR_MPMX_BIT | MDCR_SPME_BIT);
+	write_mdcr_el3(mdcr_el3);
+
+	/* ---------------------------------------------------------------------
+	 * Initialise PMCR_EL0 setting all fields rather than relying
+	 * on hw. Some fields are architecturally UNKNOWN on reset.
+	 *
+	 * PMCR_EL0.DP: Set to one so that the cycle counter,
+	 *  PMCCNTR_EL0 does not count when event counting is prohibited.
+	 *  Necessary on PMUv3 <= p7 where MDCR_EL3.{SCCD,MCCD} are not
+	 *  available
+	 *
+	 * PMCR_EL0.X: Set to zero to disable export of events.
+	 *
+	 * PMCR_EL0.C: Set to one to reset PMCCNTR_EL0 to zero.
+	 *
+	 * PMCR_EL0.P: Set to one to reset each event counter PMEVCNTR<n>_EL0 to
+	 *  zero.
+	 *
+	 * PMCR_EL0.E: Set to zero to disable cycle and event counters.
+	 * ---------------------------------------------------------------------
+	 */
+	write_pmcr_el0((read_pmcr_el0() | PMCR_EL0_DP_BIT | PMCR_EL0_C_BIT |
+			PMCR_EL0_P_BIT) & ~(PMCR_EL0_X_BIT | PMCR_EL0_E_BIT));
+}
+
+void pmuv3_init_el2_unused(void)
+{
+	u_register_t mdcr_el2 = read_mdcr_el2();
+
+	/*
+	 * Initialise MDCR_EL2, setting all fields rather than
+	 * relying on hw. Some fields are architecturally
+	 * UNKNOWN on reset.
+	 *
+	 * MDCR_EL2.HLP: Set to one so that event counter overflow, that is
+	 *  recorded in PMOVSCLR_EL0[0-30], occurs on the increment that changes
+	 *  PMEVCNTR<n>_EL0[63] from 1 to 0, when ARMv8.5-PMU is implemented.
+	 *  This bit is RES0 in versions of the architecture earlier than
+	 *  ARMv8.5, setting it to 1 doesn't have any effect on them.
+	 *
+	 * MDCR_EL2.HCCD: Set to one to prohibit cycle counting at EL2. This bit
+	 *  is RES0 in versions of the architecture with FEAT_PMUv3p5 not
+	 *  implemented.
+	 *
+	 * MDCR_EL2.HPMD: Set to one so that event counting is
+	 *  prohibited at EL2 for counter n < MDCR_EL2.HPMN. This bit  is RES0
+	 *  in versions of the architecture with FEAT_PMUv3p1 not implemented.
+	 *
+	 * MDCR_EL2.HPME: Set to zero to disable event counters for counters
+	 *  n >= MDCR_EL2.HPMN.
+	 *
+	 * MDCR_EL2.TPM: Set to zero so that Non-secure EL0 and
+	 *  EL1 accesses to all Performance Monitors registers
+	 *  are not trapped to EL2.
+	 *
+	 * MDCR_EL2.TPMCR: Set to zero so that Non-secure EL0
+	 *  and EL1 accesses to the PMCR_EL0 or PMCR are not
+	 *  trapped to EL2.
+	 */
+	mdcr_el2 = (mdcr_el2 | MDCR_EL2_HLP_BIT | MDCR_EL2_HPMD_BIT |
+		    MDCR_EL2_HCCD_BIT) &
+		  ~(MDCR_EL2_HPME_BIT | MDCR_EL2_TPM_BIT | MDCR_EL2_TPMCR_BIT);
+	mdcr_el2 = init_mdcr_el2_hpmn(mdcr_el2);
+	write_mdcr_el2(mdcr_el2);
+}
diff --git a/services/std_svc/rmmd/rmmd_main.c b/services/std_svc/rmmd/rmmd_main.c
index 24f6c41..c80b524 100644
--- a/services/std_svc/rmmd/rmmd_main.c
+++ b/services/std_svc/rmmd/rmmd_main.c
@@ -18,6 +18,8 @@
 #include <context.h>
 #include <lib/el3_runtime/context_mgmt.h>
 #include <lib/el3_runtime/pubsub.h>
+#include <lib/extensions/pmuv3.h>
+#include <lib/extensions/sys_reg_trace.h>
 #include <lib/gpt_rme/gpt_rme.h>
 
 #include <lib/spinlock.h>
@@ -125,6 +127,8 @@
 	 */
 		sve_enable(ctx);
 	}
+
+	pmuv3_enable(ctx);
 }
 
 /*******************************************************************************