feat(d128): add support for FEAT_D128

This patch disables trapping to EL3 when the FEAT_D128
specific registers are accessed by setting the SCR_EL3.D128En bit.

If FEAT_D128 is implemented, then FEAT_SYSREG128 is implemented.
With FEAT_SYSREG128 certain system registers are treated as 128-bit,
so we should be context saving and restoring 128-bits instead of 64-bit
when FEAT_D128 is enabled.

FEAT_SYSREG128 adds support for MRRS and MSRR instruction which
helps us to read write to 128-bit system register.
Refer to Arm Architecture Manual for further details.

Change the FVP platform to default to handling this as a dynamic option
so the right decision can be made by the code at runtime.

Change-Id: I1a53db5eac29e56c8fbdcd4961ede3abfcb2411a
Signed-off-by: Jayanth Dodderi Chidanand <jayanthdodderi.chidanand@arm.com>
Signed-off-by: Govindraj Raja <govindraj.raja@arm.com>
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 003cb25..d2222fa 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -33,6 +33,7 @@
 #include <lib/extensions/sme.h>
 #include <lib/extensions/spe.h>
 #include <lib/extensions/sve.h>
+#include <lib/extensions/sysreg128.h>
 #include <lib/extensions/sys_reg_trace.h>
 #include <lib/extensions/tcr2.h>
 #include <lib/extensions/trbe.h>
@@ -275,6 +276,14 @@
 		scr_el3 |= SCR_SCTLR2En_BIT;
 	}
 
+	if (is_feat_d128_supported()) {
+		/* Set the D128En bit in SCR_EL3 to enable access to 128-bit
+		 * versions of TTBR0_EL1, TTBR1_EL1, RCWMASK_EL1, RCWSMASK_EL1,
+		 * PAR_EL1 and TTBR1_EL2, TTBR0_EL2 and VTTBR_EL2 registers.
+		 */
+		scr_el3 |= SCR_D128En_BIT;
+	}
+
 	write_ctx_reg(state, CTX_SCR_EL3, scr_el3);
 
 	/* Initialize EL2 context registers */
@@ -1322,12 +1331,13 @@
 	write_el2_ctx_common(ctx, sp_el2, read_sp_el2());
 	write_el2_ctx_common(ctx, tcr_el2, read_tcr_el2());
 	write_el2_ctx_common(ctx, tpidr_el2, read_tpidr_el2());
-	write_el2_ctx_common(ctx, ttbr0_el2, read_ttbr0_el2());
 	write_el2_ctx_common(ctx, vbar_el2, read_vbar_el2());
 	write_el2_ctx_common(ctx, vmpidr_el2, read_vmpidr_el2());
 	write_el2_ctx_common(ctx, vpidr_el2, read_vpidr_el2());
 	write_el2_ctx_common(ctx, vtcr_el2, read_vtcr_el2());
-	write_el2_ctx_common(ctx, vttbr_el2, read_vttbr_el2());
+
+	write_el2_ctx_sysreg128(ctx, ttbr0_el2, read_ttbr0_el2());
+	write_el2_ctx_sysreg128(ctx, vttbr_el2, read_vttbr_el2());
 }
 
 static void el2_sysregs_context_restore_common(el2_sysregs_t *ctx)
@@ -1403,7 +1413,7 @@
 	if (is_feat_vhe_supported()) {
 		write_el2_ctx_vhe(el2_sysregs_ctx, contextidr_el2,
 					read_contextidr_el2());
-		write_el2_ctx_vhe(el2_sysregs_ctx, ttbr1_el2, read_ttbr1_el2());
+		write_el2_ctx_vhe_sysreg128(el2_sysregs_ctx, ttbr1_el2, read_ttbr1_el2());
 	}
 
 	if (is_feat_ras_supported()) {
diff --git a/lib/extensions/sysreg128/sysreg128.S b/lib/extensions/sysreg128/sysreg128.S
new file mode 100644
index 0000000..08cff2f
--- /dev/null
+++ b/lib/extensions/sysreg128/sysreg128.S
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2024, Arm Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <asm_macros.S>
+#include <lib/extensions/sysreg128.h>
+
+        .global read_par_el1
+        .global write_par_el1
+        .global read_ttbr0_el1
+        .global write_ttbr0_el1
+        .global read_ttbr1_el1
+        .global write_ttbr1_el1
+        .global read_ttbr0_el2
+        .global write_ttbr0_el2
+        .global read_ttbr1_el2
+        .global write_ttbr1_el2
+        .global read_vttbr_el2
+        .global write_vttbr_el2
+        .global read_rcwmask_el1
+        .global write_rcwmask_el1
+        .global read_rcwsmask_el1
+        .global write_rcwsmask_el1
+
+/*
+ * _mrrs - Move System register to two adjacent general-purpose
+ * registers.
+ * Instruction: MRRS <Xt>, <Xt+1>, (<systemreg>|S<op0>_<op1>_<Cn>_<Cm>_<op2>)
+ *
+ * Arguments/Opcode bit field:
+ * regins: System register opcode.
+ *
+ * Clobbers: x0,x1,x2
+ */
+.macro _mrrs regins:req
+#if ENABLE_FEAT_D128 == 2
+        mrs     x0, ID_AA64MMFR3_EL1
+        tst     x0, #(ID_AA64MMFR3_EL1_D128_MASK << ID_AA64MMFR3_EL1_D128_SHIFT)
+        bne     1f
+        /* If FEAT_D128 is not implemented then use mrs */
+        .inst   0xD5300000 | (\regins)
+        ret
+#endif
+1:
+        .inst   0xD5700000 | (\regins)
+        ret
+.endm
+
+/*
+ * _msrr - Move two adjacent general-purpose registers to System register.
+ * Instruction: MSRR (<systemreg>|S<op0>_<op1>_<Cn>_<Cm>_<op2>), <Xt>, <Xt+1>
+ *
+ * Arguments/Opcode bit field:
+ * regins: System register opcode.
+ *
+ * Clobbers: x0,x1,x2
+ */
+.macro _msrr regins:req
+        /* If FEAT_D128 is not implemented use msr, dont tamper
+         * x0, x1 as they maybe used for mrrs */
+#if ENABLE_FEAT_D128 == 2
+        mrs     x2, ID_AA64MMFR3_EL1
+        tst     x2, #(ID_AA64MMFR3_EL1_D128_MASK << ID_AA64MMFR3_EL1_D128_SHIFT)
+        bne     1f
+        /* If FEAT_D128 is not implemented then use msr */
+        .inst   0xD5100000 | (\regins)
+        ret
+#endif
+1:
+        .inst   0xD5500000 | (\regins)
+        ret
+.endm
+
+func read_par_el1
+        _mrrs   0x87400 /* S3_0_C7_C4_0 */
+endfunc read_par_el1
+
+func write_par_el1
+        _msrr   0x87400
+endfunc write_par_el1
+
+func read_ttbr0_el1
+        _mrrs   0x82000 /* S3_0_C2_C0_0 */
+endfunc read_ttbr0_el1
+
+func write_ttbr0_el1
+        _msrr 0x82000
+endfunc write_ttbr0_el1
+
+func read_ttbr1_el1
+        _mrrs 0x82020 /* S3_0_C2_C0_1 */
+endfunc read_ttbr1_el1
+
+func write_ttbr1_el1
+        _msrr 0x82020
+endfunc write_ttbr1_el1
+
+func read_ttbr0_el2
+        _mrrs 0xC2000 /* S3_4_C2_C0_0 */
+endfunc read_ttbr0_el2
+
+func write_ttbr0_el2
+        _msrr 0xC2000
+endfunc write_ttbr0_el2
+
+func read_ttbr1_el2
+        _mrrs 0xC2020 /* S3_4_C2_C0_1 */
+endfunc read_ttbr1_el2
+
+func write_ttbr1_el2
+        _msrr 0xC2020
+endfunc write_ttbr1_el2
+
+func read_vttbr_el2
+        _mrrs 0xC2100 /* S3_4_C2_C1_0 */
+endfunc read_vttbr_el2
+
+func write_vttbr_el2
+        _msrr 0xC2100
+endfunc write_vttbr_el2
+
+func read_rcwmask_el1
+        _mrrs 0x8D0C0 /* S3_0_C13_C0_6 */
+endfunc read_rcwmask_el1
+
+func write_rcwmask_el1
+        _msrr 0x8D0C0
+endfunc write_rcwmask_el1
+
+func read_rcwsmask_el1
+        _mrrs 0x8D060 /* S3_0_C13_C0_3 */
+endfunc read_rcwsmask_el1
+
+func write_rcwsmask_el1
+        _msrr 0x8D060
+endfunc write_rcwsmask_el1
diff --git a/lib/xlat_tables/aarch64/xlat_tables.c b/lib/xlat_tables/aarch64/xlat_tables.c
index f4195f4..f207266 100644
--- a/lib/xlat_tables/aarch64/xlat_tables.c
+++ b/lib/xlat_tables/aarch64/xlat_tables.c
@@ -66,7 +66,7 @@
  */
 static const unsigned int pa_range_bits_arr[] = {
 	PARANGE_0000, PARANGE_0001, PARANGE_0010, PARANGE_0011, PARANGE_0100,
-	PARANGE_0101, PARANGE_0110
+	PARANGE_0101, PARANGE_0110, PARANGE_0111
 };
 
 static unsigned long long get_max_supported_pa(void)
diff --git a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
index 18e001b..7321fd7 100644
--- a/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
+++ b/lib/xlat_tables_v2/aarch64/xlat_tables_arch.c
@@ -109,7 +109,7 @@
  */
 static const unsigned int pa_range_bits_arr[] = {
 	PARANGE_0000, PARANGE_0001, PARANGE_0010, PARANGE_0011, PARANGE_0100,
-	PARANGE_0101, PARANGE_0110
+	PARANGE_0101, PARANGE_0110, PARANGE_0111
 };
 
 unsigned long long xlat_arch_get_max_supported_pa(void)