Enable v8.6 AMU enhancements (FEAT_AMUv1p1)

ARMv8.6 adds virtual offset registers to support virtualization of the
event counters in EL1 and EL0.  This patch enables support for this
feature in EL3 firmware.

Signed-off-by: John Powell <john.powell@arm.com>
Change-Id: I7ee1f3d9f554930bf5ef6f3d492e932e6d95b217
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index 4997363..24c3737 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -8,6 +8,7 @@
 #include <stdbool.h>
 
 #include <arch.h>
+#include <arch_features.h>
 #include <arch_helpers.h>
 
 #include <lib/el3_runtime/pubsub_events.h>
@@ -18,13 +19,17 @@
 
 static struct amu_ctx amu_ctxs[PLATFORM_CORE_COUNT];
 
-/* Check if AMUv1 for Armv8.4 or 8.6 is implemented */
-bool amu_supported(void)
+/*
+ * Get AMU version value from aa64pfr0.
+ * Return values
+ *   ID_AA64PFR0_AMU_V1: FEAT_AMUv1 supported (introduced in ARM v8.4)
+ *   ID_AA64PFR0_AMU_V1P1: FEAT_AMUv1p1 supported (introduced in ARM v8.6)
+ *   ID_AA64PFR0_AMU_NOT_SUPPORTED: not supported
+ */
+unsigned int amu_get_version(void)
 {
-	uint64_t features = read_id_aa64pfr0_el1() >> ID_AA64PFR0_AMU_SHIFT;
-
-	features &= ID_AA64PFR0_AMU_MASK;
-	return ((features == 1U) || (features == 2U));
+	return (unsigned int)(read_id_aa64pfr0_el1() >> ID_AA64PFR0_AMU_SHIFT) &
+		ID_AA64PFR0_AMU_MASK;
 }
 
 #if AMU_GROUP1_NR_COUNTERS
@@ -44,8 +49,9 @@
 void amu_enable(bool el2_unused)
 {
 	uint64_t v;
+	unsigned int amu_version = amu_get_version();
 
-	if (!amu_supported()) {
+	if (amu_version == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
 		return;
 	}
 
@@ -96,12 +102,36 @@
 	/* Enable group 1 counters */
 	write_amcntenset1_el0(AMU_GROUP1_COUNTERS_MASK);
 #endif
+
+	/* Initialize FEAT_AMUv1p1 features if present. */
+	if (amu_version < ID_AA64PFR0_AMU_V1P1) {
+		return;
+	}
+
+	if (el2_unused) {
+		/* Make sure virtual offsets are disabled if EL2 not used. */
+		write_hcr_el2(read_hcr_el2() & ~HCR_AMVOFFEN_BIT);
+	}
+
+#if AMU_RESTRICT_COUNTERS
+	/*
+	 * FEAT_AMUv1p1 adds a register field to restrict access to group 1
+	 * counters at all but the highest implemented EL.  This is controlled
+	 * with the AMU_RESTRICT_COUNTERS compile time flag, when set, system
+	 * register reads at lower ELs return zero.  Reads from the memory
+	 * mapped view are unaffected.
+	 */
+	VERBOSE("AMU group 1 counter access restricted.\n");
+	write_amcr_el0(read_amcr_el0() | AMCR_CG1RZ_BIT);
+#else
+	write_amcr_el0(read_amcr_el0() & ~AMCR_CG1RZ_BIT);
+#endif
 }
 
 /* Read the group 0 counter identified by the given `idx`. */
 uint64_t amu_group0_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(idx < AMU_GROUP0_NR_COUNTERS);
 
 	return amu_group0_cnt_read_internal(idx);
@@ -110,18 +140,49 @@
 /* Write the group 0 counter identified by the given `idx` with `val` */
 void amu_group0_cnt_write(unsigned  int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(idx < AMU_GROUP0_NR_COUNTERS);
 
 	amu_group0_cnt_write_internal(idx, val);
 	isb();
 }
 
+/*
+ * Read the group 0 offset register for a given index. Index must be 0, 2,
+ * or 3, the register for 1 does not exist.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+uint64_t amu_group0_voffset_read(unsigned int idx)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(idx < AMU_GROUP0_NR_COUNTERS);
+	assert(idx != 1U);
+
+	return amu_group0_voffset_read_internal(idx);
+}
+
+/*
+ * Write the group 0 offset register for a given index. Index must be 0, 2, or
+ * 3, the register for 1 does not exist.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+void amu_group0_voffset_write(unsigned int idx, uint64_t val)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(idx < AMU_GROUP0_NR_COUNTERS);
+	assert(idx != 1U);
+
+	amu_group0_voffset_write_internal(idx, val);
+	isb();
+}
+
 #if AMU_GROUP1_NR_COUNTERS
 /* Read the group 1 counter identified by the given `idx` */
-uint64_t amu_group1_cnt_read(unsigned  int idx)
+uint64_t amu_group1_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -129,9 +190,9 @@
 }
 
 /* Write the group 1 counter identified by the given `idx` with `val` */
-void amu_group1_cnt_write(unsigned  int idx, uint64_t val)
+void amu_group1_cnt_write(unsigned int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -140,12 +201,45 @@
 }
 
 /*
+ * Read the group 1 offset register for a given index.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+uint64_t amu_group1_voffset_read(unsigned int idx)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(amu_group1_supported());
+	assert(idx < AMU_GROUP1_NR_COUNTERS);
+	assert(((read_amcg1idr_el0() >> AMCG1IDR_VOFF_SHIFT) &
+		(1ULL << idx)) != 0ULL);
+
+	return amu_group1_voffset_read_internal(idx);
+}
+
+/*
+ * Write the group 1 offset register for a given index.
+ *
+ * Using this function requires FEAT_AMUv1p1 support.
+ */
+void amu_group1_voffset_write(unsigned int idx, uint64_t val)
+{
+	assert(amu_get_version() >= ID_AA64PFR0_AMU_V1P1);
+	assert(amu_group1_supported());
+	assert(idx < AMU_GROUP1_NR_COUNTERS);
+	assert(((read_amcg1idr_el0() >> AMCG1IDR_VOFF_SHIFT) &
+		(1ULL << idx)) != 0ULL);
+
+	amu_group1_voffset_write_internal(idx, val);
+	isb();
+}
+
+/*
  * Program the event type register for the given `idx` with
  * the event number `val`
  */
 void amu_group1_set_evtype(unsigned int idx, unsigned int val)
 {
-	assert(amu_supported());
+	assert(amu_get_version() != ID_AA64PFR0_AMU_NOT_SUPPORTED);
 	assert(amu_group1_supported());
 	assert(idx < AMU_GROUP1_NR_COUNTERS);
 
@@ -159,7 +253,7 @@
 	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
 	unsigned int i;
 
-	if (!amu_supported()) {
+	if (amu_get_version() == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
 		return (void *)-1;
 	}
 
@@ -190,13 +284,37 @@
 		ctx->group0_cnts[i] = amu_group0_cnt_read(i);
 	}
 
+	/* Save group 0 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		/* Not using a loop because count is fixed and index 1 DNE. */
+		ctx->group0_voffsets[0U] = amu_group0_voffset_read(0U);
+		ctx->group0_voffsets[1U] = amu_group0_voffset_read(2U);
+		ctx->group0_voffsets[2U] = amu_group0_voffset_read(3U);
+	}
+
 #if AMU_GROUP1_NR_COUNTERS
 	/* Save group 1 counters */
 	for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
-		if ((AMU_GROUP1_COUNTERS_MASK & (1U << i)) != 0U) {
+		if ((AMU_GROUP1_COUNTERS_MASK & (1UL << i)) != 0U) {
 			ctx->group1_cnts[i] = amu_group1_cnt_read(i);
 		}
 	}
+
+	/* Save group 1 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		u_register_t amcg1idr = read_amcg1idr_el0() >>
+			AMCG1IDR_VOFF_SHIFT;
+		amcg1idr = amcg1idr & AMU_GROUP1_COUNTERS_MASK;
+
+		for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
+			if (((amcg1idr >> i) & 1ULL) != 0ULL) {
+				ctx->group1_voffsets[i] =
+					amu_group1_voffset_read(i);
+			}
+		}
+	}
 #endif
 	return (void *)0;
 }
@@ -206,7 +324,7 @@
 	struct amu_ctx *ctx = &amu_ctxs[plat_my_core_pos()];
 	unsigned int i;
 
-	if (!amu_supported()) {
+	if (amu_get_version() == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
 		return (void *)-1;
 	}
 
@@ -227,17 +345,41 @@
 		amu_group0_cnt_write(i, ctx->group0_cnts[i]);
 	}
 
+	/* Restore group 0 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		/* Not using a loop because count is fixed and index 1 DNE. */
+		amu_group0_voffset_write(0U, ctx->group0_voffsets[0U]);
+		amu_group0_voffset_write(2U, ctx->group0_voffsets[1U]);
+		amu_group0_voffset_write(3U, ctx->group0_voffsets[2U]);
+	}
+
 	/* Restore group 0 counter configuration */
 	write_amcntenset0_el0(AMU_GROUP0_COUNTERS_MASK);
 
 #if AMU_GROUP1_NR_COUNTERS
 	/* Restore group 1 counters */
 	for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
-		if ((AMU_GROUP1_COUNTERS_MASK & (1U << i)) != 0U) {
+		if ((AMU_GROUP1_COUNTERS_MASK & (1UL << i)) != 0U) {
 			amu_group1_cnt_write(i, ctx->group1_cnts[i]);
 		}
 	}
 
+	/* Restore group 1 virtual offsets if supported and enabled. */
+	if ((amu_get_version() >= ID_AA64PFR0_AMU_V1P1) &&
+			((read_hcr_el2() & HCR_AMVOFFEN_BIT) != 0ULL)) {
+		u_register_t amcg1idr = read_amcg1idr_el0() >>
+			AMCG1IDR_VOFF_SHIFT;
+		amcg1idr = amcg1idr & AMU_GROUP1_COUNTERS_MASK;
+
+		for (i = 0U; i < AMU_GROUP1_NR_COUNTERS; i++) {
+			if (((amcg1idr >> i) & 1ULL) != 0ULL) {
+				amu_group1_voffset_write(i,
+					ctx->group1_voffsets[i]);
+			}
+		}
+	}
+
 	/* Restore group 1 counter configuration */
 	write_amcntenset1_el0(AMU_GROUP1_COUNTERS_MASK);
 #endif
diff --git a/lib/extensions/amu/aarch64/amu_helpers.S b/lib/extensions/amu/aarch64/amu_helpers.S
index 89007a3..9989abd 100644
--- a/lib/extensions/amu/aarch64/amu_helpers.S
+++ b/lib/extensions/amu/aarch64/amu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -14,6 +14,12 @@
 	.globl	amu_group1_cnt_write_internal
 	.globl	amu_group1_set_evtype_internal
 
+	/* FEAT_AMUv1p1 virtualisation offset register functions */
+	.globl	amu_group0_voffset_read_internal
+	.globl	amu_group0_voffset_write_internal
+	.globl	amu_group1_voffset_read_internal
+	.globl	amu_group1_voffset_write_internal
+
 /*
  * uint64_t amu_group0_cnt_read_internal(int idx);
  *
@@ -211,3 +217,169 @@
 	write	AMEVTYPER1E_EL0		/* index 14 */
 	write	AMEVTYPER1F_EL0		/* index 15 */
 endfunc amu_group1_set_evtype_internal
+
+/*
+ * Accessor functions for virtual offset registers added with FEAT_AMUv1p1
+ */
+
+/*
+ * uint64_t amu_group0_voffset_read_internal(int idx);
+ *
+ * Given `idx`, read the corresponding AMU virtual offset register
+ * and return it in `x0`.
+ */
+func amu_group0_voffset_read_internal
+	adr	x1, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~3
+	ASM_ASSERT(eq)
+	/* Make sure idx != 1 since AMEVCNTVOFF01_EL2 does not exist */
+	cmp	x0, #1
+	ASM_ASSERT(ne)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x1
+
+1:	read	AMEVCNTVOFF00_EL2	/* index 0 */
+	.skip	8			/* AMEVCNTVOFF01_EL2 does not exist */
+#if ENABLE_BTI
+	.skip	4
+#endif
+	read	AMEVCNTVOFF02_EL2	/* index 2 */
+	read	AMEVCNTVOFF03_EL2	/* index 3 */
+endfunc amu_group0_voffset_read_internal
+
+/*
+ * void amu_group0_voffset_write_internal(int idx, uint64_t val);
+ *
+ * Given `idx`, write `val` to the corresponding AMU virtual offset register.
+ */
+func amu_group0_voffset_write_internal
+	adr	x2, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~3
+	ASM_ASSERT(eq)
+	/* Make sure idx != 1 since AMEVCNTVOFF01_EL2 does not exist */
+	cmp	x0, #1
+	ASM_ASSERT(ne)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x2
+
+1:	write	AMEVCNTVOFF00_EL2	/* index 0 */
+	.skip	8			/* AMEVCNTVOFF01_EL2 does not exist */
+#if ENABLE_BTI
+	.skip	4
+#endif
+	write	AMEVCNTVOFF02_EL2	/* index 2 */
+	write	AMEVCNTVOFF03_EL2	/* index 3 */
+endfunc amu_group0_voffset_write_internal
+
+/*
+ * uint64_t amu_group1_voffset_read_internal(int idx);
+ *
+ * Given `idx`, read the corresponding AMU virtual offset register
+ * and return it in `x0`.
+ */
+func amu_group1_voffset_read_internal
+	adr	x1, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~0xF
+	ASM_ASSERT(eq)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x1
+
+1:	read	AMEVCNTVOFF10_EL2	/* index 0 */
+	read	AMEVCNTVOFF11_EL2	/* index 1 */
+	read	AMEVCNTVOFF12_EL2	/* index 2 */
+	read	AMEVCNTVOFF13_EL2	/* index 3 */
+	read	AMEVCNTVOFF14_EL2	/* index 4 */
+	read	AMEVCNTVOFF15_EL2	/* index 5 */
+	read	AMEVCNTVOFF16_EL2	/* index 6 */
+	read	AMEVCNTVOFF17_EL2	/* index 7 */
+	read	AMEVCNTVOFF18_EL2	/* index 8 */
+	read	AMEVCNTVOFF19_EL2	/* index 9 */
+	read	AMEVCNTVOFF1A_EL2	/* index 10 */
+	read	AMEVCNTVOFF1B_EL2	/* index 11 */
+	read	AMEVCNTVOFF1C_EL2	/* index 12 */
+	read	AMEVCNTVOFF1D_EL2	/* index 13 */
+	read	AMEVCNTVOFF1E_EL2	/* index 14 */
+	read	AMEVCNTVOFF1F_EL2	/* index 15 */
+endfunc amu_group1_voffset_read_internal
+
+/*
+ * void amu_group1_voffset_write_internal(int idx, uint64_t val);
+ *
+ * Given `idx`, write `val` to the corresponding AMU virtual offset register.
+ */
+func amu_group1_voffset_write_internal
+	adr	x2, 1f
+#if ENABLE_ASSERTIONS
+	/*
+	 * It can be dangerous to call this function with an
+	 * out of bounds index.  Ensure `idx` is valid.
+	 */
+	tst	x0, #~0xF
+	ASM_ASSERT(eq)
+#endif
+	/*
+	 * Given `idx` calculate address of mrs/ret instruction pair
+	 * in the table below.
+	 */
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
+	br	x2
+
+1:	write	AMEVCNTVOFF10_EL2	/* index 0 */
+	write	AMEVCNTVOFF11_EL2	/* index 1 */
+	write	AMEVCNTVOFF12_EL2	/* index 2 */
+	write	AMEVCNTVOFF13_EL2	/* index 3 */
+	write	AMEVCNTVOFF14_EL2	/* index 4 */
+	write	AMEVCNTVOFF15_EL2	/* index 5 */
+	write	AMEVCNTVOFF16_EL2	/* index 6 */
+	write	AMEVCNTVOFF17_EL2	/* index 7 */
+	write	AMEVCNTVOFF18_EL2	/* index 8 */
+	write	AMEVCNTVOFF19_EL2	/* index 9 */
+	write	AMEVCNTVOFF1A_EL2	/* index 10 */
+	write	AMEVCNTVOFF1B_EL2	/* index 11 */
+	write	AMEVCNTVOFF1C_EL2	/* index 12 */
+	write	AMEVCNTVOFF1D_EL2	/* index 13 */
+	write	AMEVCNTVOFF1E_EL2	/* index 14 */
+	write	AMEVCNTVOFF1F_EL2	/* index 15 */
+endfunc amu_group1_voffset_write_internal