Add support for Branch Target Identification

This patch adds the functionality needed for platforms to provide
Branch Target Identification (BTI) extension, introduced to AArch64
in Armv8.5-A by adding BTI instruction used to mark valid targets
for indirect branches. The patch sets new GP bit [50] to the stage 1
Translation Table Block and Page entries to denote guarded EL3 code
pages which will cause processor to trap instructions in protected
pages trying to perform an indirect branch to any instruction other
than BTI.
BTI feature is selected by BRANCH_PROTECTION option which supersedes
the previous ENABLE_PAUTH used for Armv8.3-A Pointer Authentication
and is disabled by default. Enabling BTI requires compiler support
and was tested with GCC versions 9.0.0, 9.0.1 and 10.0.0.
The assembly macros and helpers are modified to accommodate the BTI
instruction.
This is an experimental feature.
Note. The previous ENABLE_PAUTH build option to enable PAuth in EL3
is now made as an internal flag and BRANCH_PROTECTION flag should be
used instead to enable Pointer Authentication.
Note. USE_LIBROM=1 option is currently not supported.

Change-Id: Ifaf4438609b16647dc79468b70cd1f47a623362e
Signed-off-by: Alexei Fedorov <Alexei.Fedorov@arm.com>
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 9c40b9d..9ef8ca7 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2014, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -91,6 +91,9 @@
 	cbz	x3, exit
 	adr	x14, dcsw_loop_table	// compute inner loop address
 	add	x14, x14, x0, lsl #5	// inner loop is 8x32-bit instructions
+#if ENABLE_BTI
+	add	x14, x14, x0, lsl #2	// inner loop is + "bti j" instruction
+#endif
 	mov	x0, x9
 	mov	w8, #1
 loop1:
@@ -116,6 +119,9 @@
 	br	x14			// jump to DC operation specific loop
 
 	.macro	dcsw_loop _op
+#if ENABLE_BTI
+	bti	j
+#endif
 loop2_\_op:
 	lsl	w7, w6, w2		// w7 = aligned max set number
 
diff --git a/lib/cpus/aarch64/cpuamu_helpers.S b/lib/cpus/aarch64/cpuamu_helpers.S
index 79b7288..5a77fc7 100644
--- a/lib/cpus/aarch64/cpuamu_helpers.S
+++ b/lib/cpus/aarch64/cpuamu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2018-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -23,21 +23,17 @@
  */
 func cpuamu_cnt_read
 	adr	x1, 1f
-	lsl	x0, x0, #3
-	add	x1, x1, x0
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x1
 
-1:
-	mrs	x0, CPUAMEVCNTR0_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR1_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR2_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR3_EL0
-	ret
-	mrs	x0, CPUAMEVCNTR4_EL0
-	ret
+1:	read	CPUAMEVCNTR0_EL0
+	read	CPUAMEVCNTR1_EL0
+	read	CPUAMEVCNTR2_EL0
+	read	CPUAMEVCNTR3_EL0
+	read	CPUAMEVCNTR4_EL0
 endfunc cpuamu_cnt_read
 
 /*
@@ -47,21 +43,17 @@
  */
 func cpuamu_cnt_write
 	adr	x2, 1f
-	lsl	x0, x0, #3
-	add	x2, x2, x0
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x2
 
-1:
-	msr	CPUAMEVCNTR0_EL0, x0
-	ret
-	msr	CPUAMEVCNTR1_EL0, x0
-	ret
-	msr	CPUAMEVCNTR2_EL0, x0
-	ret
-	msr	CPUAMEVCNTR3_EL0, x0
-	ret
-	msr	CPUAMEVCNTR4_EL0, x0
-	ret
+1:	write	CPUAMEVCNTR0_EL0
+	write	CPUAMEVCNTR1_EL0
+	write	CPUAMEVCNTR2_EL0
+	write	CPUAMEVCNTR3_EL0
+	write	CPUAMEVCNTR4_EL0
 endfunc cpuamu_cnt_write
 
 /*
diff --git a/lib/extensions/amu/aarch64/amu_helpers.S b/lib/extensions/amu/aarch64/amu_helpers.S
index e0b1f56..89007a3 100644
--- a/lib/extensions/amu/aarch64/amu_helpers.S
+++ b/lib/extensions/amu/aarch64/amu_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -21,35 +21,29 @@
  * and return it in `x0`.
  */
 func amu_group0_cnt_read_internal
+	adr	x1, 1f
 #if ENABLE_ASSERTIONS
 	/*
 	 * It can be dangerous to call this function with an
 	 * out of bounds index.  Ensure `idx` is valid.
 	 */
-	mov	x1, x0
-	lsr	x1, x1, #2
-	cmp	x1, #0
+	tst	x0, #~3
 	ASM_ASSERT(eq)
 #endif
-
 	/*
 	 * Given `idx` calculate address of mrs/ret instruction pair
 	 * in the table below.
 	 */
-	adr	x1, 1f
-	lsl	x0, x0, #3		/* each mrs/ret sequence is 8 bytes */
-	add	x1, x1, x0
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x1
 
-1:
-	mrs	x0, AMEVCNTR00_EL0	/* index 0 */
-	ret
-	mrs	x0, AMEVCNTR01_EL0	/* index 1 */
-	ret
-	mrs	x0, AMEVCNTR02_EL0	/* index 2 */
-	ret
-	mrs	x0, AMEVCNTR03_EL0	/* index 3 */
-	ret
+1:	read	AMEVCNTR00_EL0		/* index 0 */
+	read	AMEVCNTR01_EL0		/* index 1 */
+	read	AMEVCNTR02_EL0		/* index 2 */
+	read	AMEVCNTR03_EL0		/* index 3 */
 endfunc amu_group0_cnt_read_internal
 
 /*
@@ -58,35 +52,29 @@
  * Given `idx`, write `val` to the corresponding AMU counter.
  */
 func amu_group0_cnt_write_internal
+	adr	x2, 1f
 #if ENABLE_ASSERTIONS
 	/*
 	 * It can be dangerous to call this function with an
 	 * out of bounds index.  Ensure `idx` is valid.
 	 */
-	mov	x2, x0
-	lsr	x2, x2, #2
-	cmp	x2, #0
+	tst	x0, #~3
 	ASM_ASSERT(eq)
 #endif
-
 	/*
 	 * Given `idx` calculate address of mrs/ret instruction pair
 	 * in the table below.
 	 */
-	adr	x2, 1f
-	lsl	x0, x0, #3		/* each msr/ret sequence is 8 bytes */
-	add	x2, x2, x0
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x2
 
-1:
-	msr	AMEVCNTR00_EL0, x1	/* index 0 */
-	ret
-	msr	AMEVCNTR01_EL0, x1	/* index 1 */
-	ret
-	msr	AMEVCNTR02_EL0, x1	/* index 2 */
-	ret
-	msr	AMEVCNTR03_EL0, x1	/* index 3 */
-	ret
+1:	write	AMEVCNTR00_EL0		/* index 0 */
+	write	AMEVCNTR01_EL0		/* index 1 */
+	write	AMEVCNTR02_EL0		/* index 2 */
+	write	AMEVCNTR03_EL0		/* index 3 */
 endfunc amu_group0_cnt_write_internal
 
 /*
@@ -96,59 +84,41 @@
  * and return it in `x0`.
  */
 func amu_group1_cnt_read_internal
+	adr	x1, 1f
 #if ENABLE_ASSERTIONS
 	/*
 	 * It can be dangerous to call this function with an
 	 * out of bounds index.  Ensure `idx` is valid.
 	 */
-	mov	x1, x0
-	lsr	x1, x1, #4
-	cmp	x1, #0
+	tst	x0, #~0xF
 	ASM_ASSERT(eq)
 #endif
-
 	/*
 	 * Given `idx` calculate address of mrs/ret instruction pair
 	 * in the table below.
 	 */
-	adr	x1, 1f
-	lsl	x0, x0, #3		/* each mrs/ret sequence is 8 bytes */
-	add	x1, x1, x0
+	add	x1, x1, x0, lsl #3	/* each mrs/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x1, x1, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x1
 
-1:
-	mrs	x0, AMEVCNTR10_EL0	/* index 0 */
-	ret
-	mrs	x0, AMEVCNTR11_EL0	/* index 1 */
-	ret
-	mrs	x0, AMEVCNTR12_EL0	/* index 2 */
-	ret
-	mrs	x0, AMEVCNTR13_EL0	/* index 3 */
-	ret
-	mrs	x0, AMEVCNTR14_EL0	/* index 4 */
-	ret
-	mrs	x0, AMEVCNTR15_EL0	/* index 5 */
-	ret
-	mrs	x0, AMEVCNTR16_EL0	/* index 6 */
-	ret
-	mrs	x0, AMEVCNTR17_EL0	/* index 7 */
-	ret
-	mrs	x0, AMEVCNTR18_EL0	/* index 8 */
-	ret
-	mrs	x0, AMEVCNTR19_EL0	/* index 9 */
-	ret
-	mrs	x0, AMEVCNTR1A_EL0	/* index 10 */
-	ret
-	mrs	x0, AMEVCNTR1B_EL0	/* index 11 */
-	ret
-	mrs	x0, AMEVCNTR1C_EL0	/* index 12 */
-	ret
-	mrs	x0, AMEVCNTR1D_EL0	/* index 13 */
-	ret
-	mrs	x0, AMEVCNTR1E_EL0	/* index 14 */
-	ret
-	mrs	x0, AMEVCNTR1F_EL0	/* index 15 */
-	ret
+1:	read	AMEVCNTR10_EL0		/* index 0 */
+	read	AMEVCNTR11_EL0		/* index 1 */
+	read	AMEVCNTR12_EL0		/* index 2 */
+	read	AMEVCNTR13_EL0		/* index 3 */
+	read	AMEVCNTR14_EL0		/* index 4 */
+	read	AMEVCNTR15_EL0		/* index 5 */
+	read	AMEVCNTR16_EL0		/* index 6 */
+	read	AMEVCNTR17_EL0		/* index 7 */
+	read	AMEVCNTR18_EL0		/* index 8 */
+	read	AMEVCNTR19_EL0		/* index 9 */
+	read	AMEVCNTR1A_EL0		/* index 10 */
+	read	AMEVCNTR1B_EL0		/* index 11 */
+	read	AMEVCNTR1C_EL0		/* index 12 */
+	read	AMEVCNTR1D_EL0		/* index 13 */
+	read	AMEVCNTR1E_EL0		/* index 14 */
+	read	AMEVCNTR1F_EL0		/* index 15 */
 endfunc amu_group1_cnt_read_internal
 
 /*
@@ -157,59 +127,41 @@
  * Given `idx`, write `val` to the corresponding AMU counter.
  */
 func amu_group1_cnt_write_internal
+	adr	x2, 1f
 #if ENABLE_ASSERTIONS
 	/*
 	 * It can be dangerous to call this function with an
 	 * out of bounds index.  Ensure `idx` is valid.
 	 */
-	mov	x2, x0
-	lsr	x2, x2, #4
-	cmp	x2, #0
+	tst	x0, #~0xF
 	ASM_ASSERT(eq)
 #endif
-
 	/*
 	 * Given `idx` calculate address of mrs/ret instruction pair
 	 * in the table below.
 	 */
-	adr	x2, 1f
-	lsl	x0, x0, #3		/* each msr/ret sequence is 8 bytes */
-	add	x2, x2, x0
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x2
 
-1:
-	msr	AMEVCNTR10_EL0, x1	/* index 0 */
-	ret
-	msr	AMEVCNTR11_EL0, x1	/* index 1 */
-	ret
-	msr	AMEVCNTR12_EL0, x1	/* index 2 */
-	ret
-	msr	AMEVCNTR13_EL0, x1	/* index 3 */
-	ret
-	msr	AMEVCNTR14_EL0, x1	/* index 4 */
-	ret
-	msr	AMEVCNTR15_EL0, x1	/* index 5 */
-	ret
-	msr	AMEVCNTR16_EL0, x1	/* index 6 */
-	ret
-	msr	AMEVCNTR17_EL0, x1	/* index 7 */
-	ret
-	msr	AMEVCNTR18_EL0, x1	/* index 8 */
-	ret
-	msr	AMEVCNTR19_EL0, x1	/* index 9 */
-	ret
-	msr	AMEVCNTR1A_EL0, x1	/* index 10 */
-	ret
-	msr	AMEVCNTR1B_EL0, x1	/* index 11 */
-	ret
-	msr	AMEVCNTR1C_EL0, x1	/* index 12 */
-	ret
-	msr	AMEVCNTR1D_EL0, x1	/* index 13 */
-	ret
-	msr	AMEVCNTR1E_EL0, x1	/* index 14 */
-	ret
-	msr	AMEVCNTR1F_EL0, x1	/* index 15 */
-	ret
+1:	write	AMEVCNTR10_EL0		/* index 0 */
+	write	AMEVCNTR11_EL0		/* index 1 */
+	write	AMEVCNTR12_EL0		/* index 2 */
+	write	AMEVCNTR13_EL0		/* index 3 */
+	write	AMEVCNTR14_EL0		/* index 4 */
+	write	AMEVCNTR15_EL0		/* index 5 */
+	write	AMEVCNTR16_EL0		/* index 6 */
+	write	AMEVCNTR17_EL0		/* index 7 */
+	write	AMEVCNTR18_EL0		/* index 8 */
+	write	AMEVCNTR19_EL0		/* index 9 */
+	write	AMEVCNTR1A_EL0		/* index 10 */
+	write	AMEVCNTR1B_EL0		/* index 11 */
+	write	AMEVCNTR1C_EL0		/* index 12 */
+	write	AMEVCNTR1D_EL0		/* index 13 */
+	write	AMEVCNTR1E_EL0		/* index 14 */
+	write	AMEVCNTR1F_EL0		/* index 15 */
 endfunc amu_group1_cnt_write_internal
 
 /*
@@ -219,63 +171,43 @@
  * with the value `val`.
  */
 func amu_group1_set_evtype_internal
+	adr	x2, 1f
 #if ENABLE_ASSERTIONS
 	/*
 	 * It can be dangerous to call this function with an
 	 * out of bounds index.  Ensure `idx` is valid.
 	 */
-	mov	x2, x0
-	lsr	x2, x2, #4
-	cmp	x2, #0
+	tst	x0, #~0xF
 	ASM_ASSERT(eq)
 
 	/* val should be between [0, 65535] */
-	mov	x2, x1
-	lsr	x2, x2, #16
-	cmp	x2, #0
+	tst	x1, #~0xFFFF
 	ASM_ASSERT(eq)
 #endif
-
 	/*
 	 * Given `idx` calculate address of msr/ret instruction pair
 	 * in the table below.
 	 */
-	adr	x2, 1f
-	lsl	x0, x0, #3		/* each msr/ret sequence is 8 bytes */
-	add	x2, x2, x0
+	add	x2, x2, x0, lsl #3	/* each msr/ret sequence is 8 bytes */
+#if ENABLE_BTI
+	add	x2, x2, x0, lsl #2	/* + "bti j" instruction */
+#endif
 	br	x2
 
-1:
-	msr	AMEVTYPER10_EL0, x1	/* index 0 */
-	ret
-	msr	AMEVTYPER11_EL0, x1	/* index 1 */
-	ret
-	msr	AMEVTYPER12_EL0, x1	/* index 2 */
-	ret
-	msr	AMEVTYPER13_EL0, x1	/* index 3 */
-	ret
-	msr	AMEVTYPER14_EL0, x1	/* index 4 */
-	ret
-	msr	AMEVTYPER15_EL0, x1	/* index 5 */
-	ret
-	msr	AMEVTYPER16_EL0, x1	/* index 6 */
-	ret
-	msr	AMEVTYPER17_EL0, x1	/* index 7 */
-	ret
-	msr	AMEVTYPER18_EL0, x1	/* index 8 */
-	ret
-	msr	AMEVTYPER19_EL0, x1	/* index 9 */
-	ret
-	msr	AMEVTYPER1A_EL0, x1	/* index 10 */
-	ret
-	msr	AMEVTYPER1B_EL0, x1	/* index 11 */
-	ret
-	msr	AMEVTYPER1C_EL0, x1	/* index 12 */
-	ret
-	msr	AMEVTYPER1D_EL0, x1	/* index 13 */
-	ret
-	msr	AMEVTYPER1E_EL0, x1	/* index 14 */
-	ret
-	msr	AMEVTYPER1F_EL0, x1	/* index 15 */
-	ret
+1:	write	AMEVTYPER10_EL0		/* index 0 */
+	write	AMEVTYPER11_EL0		/* index 1 */
+	write	AMEVTYPER12_EL0		/* index 2 */
+	write	AMEVTYPER13_EL0		/* index 3 */
+	write	AMEVTYPER14_EL0		/* index 4 */
+	write	AMEVTYPER15_EL0		/* index 5 */
+	write	AMEVTYPER16_EL0		/* index 6 */
+	write	AMEVTYPER17_EL0		/* index 7 */
+	write	AMEVTYPER18_EL0		/* index 8 */
+	write	AMEVTYPER19_EL0		/* index 9 */
+	write	AMEVTYPER1A_EL0		/* index 10 */
+	write	AMEVTYPER1B_EL0		/* index 11 */
+	write	AMEVTYPER1C_EL0		/* index 12 */
+	write	AMEVTYPER1D_EL0		/* index 13 */
+	write	AMEVTYPER1E_EL0		/* index 14 */
+	write	AMEVTYPER1F_EL0		/* index 15 */
 endfunc amu_group1_set_evtype_internal
diff --git a/lib/xlat_tables_v2/xlat_tables_core.c b/lib/xlat_tables_v2/xlat_tables_core.c
index 0e6a6fa..4f62f46 100644
--- a/lib/xlat_tables_v2/xlat_tables_core.c
+++ b/lib/xlat_tables_v2/xlat_tables_core.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -12,6 +12,7 @@
 
 #include <platform_def.h>
 
+#include <arch_features.h>
 #include <arch_helpers.h>
 #include <common/debug.h>
 #include <lib/utils_def.h>
@@ -195,6 +196,18 @@
 
 		if (mem_type == MT_MEMORY) {
 			desc |= LOWER_ATTRS(ATTR_IWBWA_OWBWA_NTR_INDEX | ISH);
+
+			/* Check if Branch Target Identification is enabled */
+#if ENABLE_BTI
+			/* Set GP bit for block and page code entries
+			 * if BTI mechanism is implemented.
+			 */
+			if (is_armv8_5_bti_present() &&
+			   ((attr & (MT_TYPE_MASK | MT_RW |
+				MT_EXECUTE_NEVER)) == MT_CODE)) {
+				desc |= GP;
+			}
+#endif
 		} else {
 			assert(mem_type == MT_NON_CACHEABLE);
 			desc |= LOWER_ATTRS(ATTR_NON_CACHEABLE_INDEX | OSH);
diff --git a/lib/xlat_tables_v2/xlat_tables_utils.c b/lib/xlat_tables_v2/xlat_tables_utils.c
index f5848a2..761d00c 100644
--- a/lib/xlat_tables_v2/xlat_tables_utils.c
+++ b/lib/xlat_tables_v2/xlat_tables_utils.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -96,6 +96,13 @@
 	}
 
 	printf(((LOWER_ATTRS(NS) & desc) != 0ULL) ? "-NS" : "-S");
+
+#ifdef AARCH64
+	/* Check Guarded Page bit */
+	if ((desc & GP) != 0ULL) {
+		printf("-GP");
+	}
+#endif
 }
 
 static const char * const level_spacers[] = {