Merge pull request #1644 from soby-mathew/sm/pie_proto

Position Indepedent Executable (PIE) Support
diff --git a/Makefile b/Makefile
index be543fa..74d5180 100644
--- a/Makefile
+++ b/Makefile
@@ -205,11 +205,6 @@
 				-Os -ffunction-sections -fdata-sections
 
 GCC_V_OUTPUT		:=	$(shell $(CC) -v 2>&1)
-PIE_FOUND		:=	$(findstring --enable-default-pie,${GCC_V_OUTPUT})
-
-ifneq ($(PIE_FOUND),)
-TF_CFLAGS		+=	-fno-PIE
-endif
 
 # Force the compiler to include the frame pointer
 ifeq (${ENABLE_BACKTRACE},1)
@@ -335,6 +330,16 @@
 include make_helpers/armv7-a-cpus.mk
 endif
 
+ifeq ($(ENABLE_PIE),1)
+    TF_CFLAGS		+=	-fpie
+    TF_LDFLAGS		+=	-pie
+else
+    PIE_FOUND		:=	$(findstring --enable-default-pie,${GCC_V_OUTPUT})
+    ifneq ($(PIE_FOUND),)
+        TF_CFLAGS		+=	-fno-PIE
+    endif
+endif
+
 # Include the CPU specific operations makefile, which provides default
 # values for all CPU errata workarounds and CPU specific optimisations.
 # This can be overridden by the platform.
@@ -565,6 +570,7 @@
 $(eval $(call assert_boolean,ENABLE_ASSERTIONS))
 $(eval $(call assert_boolean,ENABLE_BACKTRACE))
 $(eval $(call assert_boolean,ENABLE_MPAM_FOR_LOWER_ELS))
+$(eval $(call assert_boolean,ENABLE_PIE))
 $(eval $(call assert_boolean,ENABLE_PMF))
 $(eval $(call assert_boolean,ENABLE_PSCI_STAT))
 $(eval $(call assert_boolean,ENABLE_RUNTIME_INSTRUMENTATION))
@@ -615,6 +621,7 @@
 $(eval $(call add_define,ENABLE_ASSERTIONS))
 $(eval $(call add_define,ENABLE_BACKTRACE))
 $(eval $(call add_define,ENABLE_MPAM_FOR_LOWER_ELS))
+$(eval $(call add_define,ENABLE_PIE))
 $(eval $(call add_define,ENABLE_PMF))
 $(eval $(call add_define,ENABLE_PSCI_STAT))
 $(eval $(call add_define,ENABLE_RUNTIME_INSTRUMENTATION))
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index bc8cbfd..30a5c59 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -70,13 +70,19 @@
 	 *   - the coherent memory section.
 	 * ---------------------------------------------
 	 */
-	ldr	x0, =__BSS_START__
-	ldr	x1, =__BSS_SIZE__
+	adrp	x0, __BSS_START__
+	add	x0, x0, :lo12:__BSS_START__
+	adrp	x1, __BSS_END__
+	add	x1, x1, :lo12:__BSS_END__
+	sub	x1, x1, x0
 	bl	zeromem
 
 #if USE_COHERENT_MEM
-	ldr	x0, =__COHERENT_RAM_START__
-	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
+	adrp	x0, __COHERENT_RAM_START__
+	add	x0, x0, :lo12:__COHERENT_RAM_START__
+	adrp	x1, __COHERENT_RAM_END_UNALIGNED__
+	add	x1, x1, :lo12:__COHERENT_RAM_END_UNALIGNED__
+	sub	x1, x1, x0
 	bl	zeromem
 #endif
 
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 3a45e53..7c116a2 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -7,6 +7,7 @@
 #include <arch.h>
 #include <bl_common.h>
 #include <el3_common_macros.S>
+#include <platform_def.h>
 #include <pmf_asm_macros.S>
 #include <runtime_instr.h>
 #include <xlat_mmu_helpers.h>
@@ -73,6 +74,18 @@
 	mov	x22, 0
 	mov	x23, 0
 #endif /* RESET_TO_BL31 */
+
+	/* --------------------------------------------------------------------
+	 * If PIE is enabled, fixup the Global descriptor Table and dynamic
+	 * relocations
+	 * --------------------------------------------------------------------
+	 */
+#if ENABLE_PIE
+	mov_imm	x0, BL31_BASE
+	mov_imm	x1, BL31_LIMIT
+	bl	fixup_gdt_reloc
+#endif /* ENABLE_PIE */
+
 	/* ---------------------------------------------
 	 * Perform platform specific early arch. setup
 	 * ---------------------------------------------
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 81e7ba3..43d0ed4 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -26,6 +26,8 @@
     ASSERT(. == ALIGN(PAGE_SIZE),
            "BL31_BASE address is not aligned on a page boundary.")
 
+    __BL31_START__ = .;
+
 #if SEPARATE_CODE_AND_RODATA
     .text . : {
         __TEXT_START__ = .;
@@ -63,6 +65,16 @@
         KEEP(*(cpu_ops))
         __CPU_OPS_END__ = .;
 
+        /*
+         * Keep the .got section in the RO section as the it is patched
+         * prior to enabling the MMU and having the .got in RO is better for
+         * security.
+         */
+        . = ALIGN(16);
+        __GOT_START__ = .;
+        *(.got)
+        __GOT_END__ = .;
+
         /* Place pubsub sections for events */
         . = ALIGN(8);
 #include <pubsub_events.h>
@@ -153,6 +165,16 @@
         __DATA_END__ = .;
     } >RAM
 
+    . = ALIGN(16);
+    /*
+     * .rela.dyn needs to come after .data for the read-elf utility to parse
+     * this section correctly.
+     */
+    __RELA_START__ = .;
+    .rela.dyn . : {
+    } >RAM
+    __RELA_END__ = .;
+
 #ifdef BL31_PROGBITS_LIMIT
     ASSERT(. <= BL31_PROGBITS_LIMIT, "BL31 progbits has exceeded its limit.")
 #endif
@@ -265,11 +287,5 @@
     __RW_END__ = .;
     __BL31_END__ = .;
 
-    __BSS_SIZE__ = SIZEOF(.bss);
-#if USE_COHERENT_MEM
-    __COHERENT_RAM_UNALIGNED_SIZE__ =
-        __COHERENT_RAM_END_UNALIGNED__ - __COHERENT_RAM_START__;
-#endif
-
     ASSERT(. <= BL31_LIMIT, "BL31 image has exceeded its limit.")
 }
diff --git a/docs/user-guide.rst b/docs/user-guide.rst
index f4ef85d..52cb45c 100644
--- a/docs/user-guide.rst
+++ b/docs/user-guide.rst
@@ -371,6 +371,10 @@
    partitioning in EL3, however. Platform initialisation code should configure
    and use partitions in EL3 as required. This option defaults to ``0``.
 
+-  ``ENABLE_PIE``: Boolean option to enable Position Independent Executable(PIE)
+   support within generic code in TF-A. This option is currently only supported
+   in BL31. Default is 0.
+
 -  ``ENABLE_PMF``: Boolean option to enable support for optional Performance
    Measurement Framework(PMF). Default is 0.
 
diff --git a/include/common/aarch64/asm_macros.S b/include/common/aarch64/asm_macros.S
index 9621a1c..91416e4 100644
--- a/include/common/aarch64/asm_macros.S
+++ b/include/common/aarch64/asm_macros.S
@@ -105,8 +105,9 @@
 	 * Clobber: X30, X1, X2
 	 */
 	.macro get_my_mp_stack _name, _size
-	bl  plat_my_core_pos
-	ldr x2, =(\_name + \_size)
+	bl	plat_my_core_pos
+	adrp	x2, (\_name + \_size)
+	add	x2, x2, :lo12:(\_name + \_size)
 	mov x1, #\_size
 	madd x0, x0, x1, x2
 	.endm
@@ -117,7 +118,8 @@
 	 * Out: X0 = physical address of stack base
 	 */
 	.macro get_up_stack _name, _size
-	ldr x0, =(\_name + \_size)
+	adrp	x0, (\_name + \_size)
+	add	x0, x0, :lo12:(\_name + \_size)
 	.endm
 
 	/*
diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S
index 143c70c..4902583 100644
--- a/include/common/aarch64/el3_common_macros.S
+++ b/include/common/aarch64/el3_common_macros.S
@@ -283,26 +283,38 @@
 		 * an earlier boot loader stage.
 		 * -------------------------------------------------------------
 		 */
-		ldr	x0, =__RW_START__
-		ldr	x1, =__RW_END__
+		adrp	x0, __RW_START__
+		add	x0, x0, :lo12:__RW_START__
+		adrp	x1, __RW_END__
+		add	x1, x1, :lo12:__RW_END__
 		sub	x1, x1, x0
 		bl	inv_dcache_range
 #endif
+		adrp	x0, __BSS_START__
+		add	x0, x0, :lo12:__BSS_START__
 
-		ldr	x0, =__BSS_START__
-		ldr	x1, =__BSS_SIZE__
+		adrp	x1, __BSS_END__
+		add	x1, x1, :lo12:__BSS_END__
+		sub	x1, x1, x0
 		bl	zeromem
 
 #if USE_COHERENT_MEM
-		ldr	x0, =__COHERENT_RAM_START__
-		ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
+		adrp	x0, __COHERENT_RAM_START__
+		add	x0, x0, :lo12:__COHERENT_RAM_START__
+		adrp	x1, __COHERENT_RAM_END_UNALIGNED__
+		add	x1, x1, :lo12: __COHERENT_RAM_END_UNALIGNED__
+		sub	x1, x1, x0
 		bl	zeromem
 #endif
 
 #if defined(IMAGE_BL1) || (defined(IMAGE_BL2) && BL2_IN_XIP_MEM)
-		ldr	x0, =__DATA_RAM_START__
-		ldr	x1, =__DATA_ROM_START__
-		ldr	x2, =__DATA_SIZE__
+		adrp	x0, __DATA_RAM_START__
+		add	x0, x0, :lo12:__DATA_RAM_START__
+		adrp	x1, __DATA_ROM_START__
+		add	x1, x1, :lo12:__DATA_ROM_START__
+		adrp	x2, __DATA_RAM_END__
+		add	x2, x2, :lo12:__DATA_RAM_END__
+		sub	x2, x2, x0
 		bl	memcpy16
 #endif
 	.endif /* _init_c_runtime */
diff --git a/include/common/bl_common.h b/include/common/bl_common.h
index 2ecf281..6a79dc3 100644
--- a/include/common/bl_common.h
+++ b/include/common/bl_common.h
@@ -83,6 +83,7 @@
 #elif defined(IMAGE_BL2U)
 IMPORT_SYM(unsigned long, __BL2U_END__,		BL2U_END);
 #elif defined(IMAGE_BL31)
+IMPORT_SYM(unsigned long, __BL31_START__,	BL31_START);
 IMPORT_SYM(unsigned long, __BL31_END__,		BL31_END);
 #elif defined(IMAGE_BL32)
 IMPORT_SYM(unsigned long, __BL32_END__,		BL32_END);
diff --git a/include/lib/cpus/aarch32/cpu_macros.S b/include/lib/cpus/aarch32/cpu_macros.S
index 525e18c..aa728b2 100644
--- a/include/lib/cpus/aarch32/cpu_macros.S
+++ b/include/lib/cpus/aarch32/cpu_macros.S
@@ -161,10 +161,9 @@
 	.endif
 
 	/*
-	 * Weakly-bound, optional errata status printing function for CPUs of
+	 * Mandatory errata status printing function for CPUs of
 	 * this class.
 	 */
-	.weak \_name\()_errata_report
 	.word \_name\()_errata_report
 
 #ifdef IMAGE_BL32
diff --git a/include/lib/cpus/aarch64/cpu_macros.S b/include/lib/cpus/aarch64/cpu_macros.S
index 4672cbc..14616ac 100644
--- a/include/lib/cpus/aarch64/cpu_macros.S
+++ b/include/lib/cpus/aarch64/cpu_macros.S
@@ -183,10 +183,9 @@
 	.endif
 
 	/*
-	 * Weakly-bound, optional errata status printing function for CPUs of
+	 * Mandatory errata status printing function for CPUs of
 	 * this class.
 	 */
-	.weak \_name\()_errata_report
 	.quad \_name\()_errata_report
 
 #ifdef IMAGE_BL31
diff --git a/include/lib/pmf/pmf_asm_macros.S b/include/lib/pmf/pmf_asm_macros.S
index d58829e..5e19e62 100644
--- a/include/lib/pmf/pmf_asm_macros.S
+++ b/include/lib/pmf/pmf_asm_macros.S
@@ -18,10 +18,12 @@
 	mov	x9, x30
 	bl	plat_my_core_pos
 	mov	x30, x9
-	ldr	x1, =__PERCPU_TIMESTAMP_SIZE__
+	adr	x2, __PMF_PERCPU_TIMESTAMP_END__
+	adr	x1, __PMF_TIMESTAMP_START__
+	sub	x1, x2, x1
 	mov	x2, #(\_tid * PMF_TS_SIZE)
 	madd	x0, x0, x1, x2
-	ldr	x1, =pmf_ts_mem_\_name
+	adr	x1, pmf_ts_mem_\_name
 	add	x0, x0, x1
 	.endm
 
diff --git a/include/lib/utils.h b/include/lib/utils.h
index d46d846..f324a99 100644
--- a/include/lib/utils.h
+++ b/include/lib/utils.h
@@ -67,6 +67,29 @@
  *       zeroing.
  */
 void zeromem(void *mem, u_register_t length);
+
+/*
+ * Utility function to return the address of a symbol. By default, the
+ * compiler generates adr/adrp instruction pair to return the reference
+ * to the symbol and this utility is used to override this compiler
+ * generated to code to use `ldr` instruction.
+ *
+ * This helps when Position Independent Executable needs to reference a symbol
+ * which is constant and does not depend on the execute address of the binary.
+ */
+#define DEFINE_LOAD_SYM_ADDR(_name)		\
+static inline u_register_t load_addr_## _name(void)		\
+{								\
+	u_register_t v;						\
+	/* Create a void reference to silence compiler */	\
+	(void) _name;						\
+	__asm__ volatile ("ldr %0, =" #_name : "=r" (v));	\
+	return v;						\
+}
+
+/* Helper to invoke the function defined by DEFINE_LOAD_SYM_ADDR() */
+#define LOAD_ADDR_OF(_name)	(typeof(_name) *) load_addr_## _name()
+
 #endif /* !(defined(__LINKER__) || defined(__ASSEMBLY__)) */
 
 #endif /* __UTILS_H__ */
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index 1a075aa..002942e 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2018, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,7 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <assert_macros.S>
+#include <xlat_tables_defs.h>
 
 	.globl	get_afflvl_shift
 	.globl	mpidr_mask_lower_afflvls
@@ -23,6 +24,8 @@
 	.globl	disable_mmu_icache_el1
 	.globl	disable_mmu_icache_el3
 
+	.globl	fixup_gdt_reloc
+
 #if SUPPORT_VFP
 	.globl	enable_vfp
 #endif
@@ -497,3 +500,114 @@
 	ret
 endfunc enable_vfp
 #endif
+
+/* ---------------------------------------------------------------------------
+ * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
+ * (.rela.dyn) at runtime.
+ *
+ * This function is meant to be used when the firmware is compiled with -fpie
+ * and linked with -pie options. We rely on the linker script exporting
+ * appropriate markers for start and end of the section. For GOT, we
+ * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
+ * __RELA_START__ and __RELA_END__.
+ *
+ * The function takes the limits of the memory to apply fixups to as
+ * arguments (which is usually the limits of the relocable BL image).
+ *   x0 -  the start of the fixup region
+ *   x1 -  the limit of the fixup region
+ * These addresses have to be page (4KB aligned).
+ * ---------------------------------------------------------------------------
+ */
+func fixup_gdt_reloc
+	mov	x6, x0
+	mov	x7, x1
+
+	/* Test if the limits are 4K aligned */
+#if ENABLE_ASSERTIONS
+	orr	x0, x0, x1
+	tst	x0, #(PAGE_SIZE - 1)
+	ASM_ASSERT(eq)
+#endif
+	/*
+	 * Calculate the offset based on return address in x30.
+	 * Assume that this funtion is called within a page of the start of
+	 * of fixup region.
+	 */
+	and	x2, x30, #~(PAGE_SIZE - 1)
+	sub	x0, x2, x6	/* Diff(S) = Current Address - Compiled Address */
+
+	adrp	x1, __GOT_START__
+	add	x1, x1, :lo12:__GOT_START__
+	adrp	x2, __GOT_END__
+	add	x2, x2, :lo12:__GOT_END__
+
+	/*
+	 * GOT is an array of 64_bit addresses which must be fixed up as
+	 * new_addr = old_addr + Diff(S).
+	 * The new_addr is the address currently the binary is executing from
+	 * and old_addr is the address at compile time.
+	 */
+1:
+	ldr	x3, [x1]
+	/* Skip adding offset if address is < lower limit */
+	cmp	x3, x6
+	b.lo	2f
+	/* Skip adding offset if address is >= upper limit */
+	cmp	x3, x7
+	b.ge	2f
+	add	x3, x3, x0
+	str	x3, [x1]
+2:
+	add	x1, x1, #8
+	cmp	x1, x2
+	b.lo	1b
+
+	/* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
+	adrp	x1, __RELA_START__
+	add	x1, x1, :lo12:__RELA_START__
+	adrp	x2, __RELA_END__
+	add	x2, x2, :lo12:__RELA_END__
+	/*
+	 * According to ELF-64 specification, the RELA data structure is as
+	 * follows:
+	 *	typedef struct
+	 * 	{
+	 *		Elf64_Addr r_offset;
+	 *		Elf64_Xword r_info;
+	 *		Elf64_Sxword r_addend;
+	 *	} Elf64_Rela;
+	 *
+	 * r_offset is address of reference
+	 * r_info is symbol index and type of relocation (in this case
+	 * 0x403 which corresponds to R_AARCH64_RELATIV).
+	 * r_addend is constant part of expression.
+	 *
+	 * Size of Elf64_Rela structure is 24 bytes.
+	 */
+1:
+	/* Assert that the relocation type is R_AARCH64_RELATIV */
+#if ENABLE_ASSERTIONS
+	ldr	x3, [x1, #8]
+	cmp	x3, #0x403
+	ASM_ASSERT(eq)
+#endif
+	ldr	x3, [x1]	/* r_offset */
+	add	x3, x0, x3
+	ldr	x4, [x1, #16]	/* r_addend */
+
+	/* Skip adding offset if r_addend is < lower limit */
+	cmp	x4, x6
+	b.lo	2f
+	/* Skip adding offset if r_addend entry is >= upper limit */
+	cmp	x4, x7
+	b.ge	2f
+
+	add	x4, x0, x4	/* Diff(S) + r_addend */
+	str	x4, [x3]
+
+2:	add	x1, x1, #24
+	cmp	x1, x2
+	b.lo	1b
+
+	ret
+endfunc fixup_gdt_reloc
diff --git a/lib/cpus/aarch32/aem_generic.S b/lib/cpus/aarch32/aem_generic.S
index 5f3d744..7bd586a 100644
--- a/lib/cpus/aarch32/aem_generic.S
+++ b/lib/cpus/aarch32/aem_generic.S
@@ -40,6 +40,15 @@
 	b	dcsw_op_all
 endfunc aem_generic_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for AEM. Must follow AAPCS.
+ */
+func aem_generic_errata_report
+	bx	lr
+endfunc aem_generic_errata_report
+#endif
+
 /* cpu_ops for Base AEM FVP */
 declare_cpu_ops aem_generic, BASE_AEM_MIDR, CPU_NO_RESET_FUNC, \
 	aem_generic_core_pwr_dwn, \
diff --git a/lib/cpus/aarch32/cortex_a12.S b/lib/cpus/aarch32/cortex_a12.S
index 73c9750..5300fe0 100644
--- a/lib/cpus/aarch32/cortex_a12.S
+++ b/lib/cpus/aarch32/cortex_a12.S
@@ -69,6 +69,15 @@
 	b	cortex_a12_disable_smp
 endfunc cortex_a12_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex-A12. Must follow AAPCS.
+ */
+func cortex_a12_errata_report
+	bx	lr
+endfunc cortex_a12_errata_report
+#endif
+
 declare_cpu_ops cortex_a12, CORTEX_A12_MIDR, \
 	cortex_a12_reset_func, \
 	cortex_a12_core_pwr_dwn, \
diff --git a/lib/cpus/aarch32/cortex_a32.S b/lib/cpus/aarch32/cortex_a32.S
index 2b6df27..c262276 100644
--- a/lib/cpus/aarch32/cortex_a32.S
+++ b/lib/cpus/aarch32/cortex_a32.S
@@ -117,6 +117,15 @@
 	b	cortex_a32_disable_smp
 endfunc cortex_a32_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex-A32. Must follow AAPCS.
+ */
+func cortex_a32_errata_report
+	bx	lr
+endfunc cortex_a32_errata_report
+#endif
+
 declare_cpu_ops cortex_a32, CORTEX_A32_MIDR, \
 	cortex_a32_reset_func, \
 	cortex_a32_core_pwr_dwn, \
diff --git a/lib/cpus/aarch32/cortex_a5.S b/lib/cpus/aarch32/cortex_a5.S
index c07c13e..8abb66f 100644
--- a/lib/cpus/aarch32/cortex_a5.S
+++ b/lib/cpus/aarch32/cortex_a5.S
@@ -69,6 +69,15 @@
 	b	cortex_a5_disable_smp
 endfunc cortex_a5_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex-A5. Must follow AAPCS.
+ */
+func cortex_a5_errata_report
+	bx	lr
+endfunc cortex_a5_errata_report
+#endif
+
 declare_cpu_ops cortex_a5, CORTEX_A5_MIDR, \
 	cortex_a5_reset_func, \
 	cortex_a5_core_pwr_dwn, \
diff --git a/lib/cpus/aarch32/cortex_a7.S b/lib/cpus/aarch32/cortex_a7.S
index 0278d1f..4d4bb77 100644
--- a/lib/cpus/aarch32/cortex_a7.S
+++ b/lib/cpus/aarch32/cortex_a7.S
@@ -69,6 +69,15 @@
 	b	cortex_a7_disable_smp
 endfunc cortex_a7_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex-A7. Must follow AAPCS.
+ */
+func cortex_a7_errata_report
+	bx	lr
+endfunc cortex_a7_errata_report
+#endif
+
 declare_cpu_ops cortex_a7, CORTEX_A7_MIDR, \
 	cortex_a7_reset_func, \
 	cortex_a7_core_pwr_dwn, \
diff --git a/lib/cpus/aarch64/aem_generic.S b/lib/cpus/aarch64/aem_generic.S
index 7592e3d..51b5ce9 100644
--- a/lib/cpus/aarch64/aem_generic.S
+++ b/lib/cpus/aarch64/aem_generic.S
@@ -46,6 +46,15 @@
 	b	dcsw_op_all
 endfunc aem_generic_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for AEM. Must follow AAPCS.
+ */
+func aem_generic_errata_report
+	ret
+endfunc aem_generic_errata_report
+#endif
+
 	/* ---------------------------------------------
 	 * This function provides cpu specific
 	 * register information for crash reporting.
diff --git a/lib/cpus/aarch64/cortex_a35.S b/lib/cpus/aarch64/cortex_a35.S
index b22189c..2e0d631 100644
--- a/lib/cpus/aarch64/cortex_a35.S
+++ b/lib/cpus/aarch64/cortex_a35.S
@@ -114,6 +114,16 @@
 	b	cortex_a35_disable_smp
 endfunc cortex_a35_cluster_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex A35. Must follow AAPCS.
+ */
+func cortex_a35_errata_report
+	ret
+endfunc cortex_a35_errata_report
+#endif
+
+
 	/* ---------------------------------------------
 	 * This function provides cortex_a35 specific
 	 * register information for crash reporting.
diff --git a/lib/cpus/aarch64/cortex_deimos.S b/lib/cpus/aarch64/cortex_deimos.S
index aec62a2..cad906f 100644
--- a/lib/cpus/aarch64/cortex_deimos.S
+++ b/lib/cpus/aarch64/cortex_deimos.S
@@ -27,6 +27,16 @@
 	ret
 endfunc cortex_deimos_core_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex Deimos. Must follow AAPCS.
+ */
+func cortex_deimos_errata_report
+	ret
+endfunc cortex_deimos_errata_report
+#endif
+
+
 	/* ---------------------------------------------
 	 * This function provides Cortex-Deimos specific
 	 * register information for crash reporting.
diff --git a/lib/cpus/aarch64/cortex_helios.S b/lib/cpus/aarch64/cortex_helios.S
index bcda741..4812ac4 100644
--- a/lib/cpus/aarch64/cortex_helios.S
+++ b/lib/cpus/aarch64/cortex_helios.S
@@ -19,6 +19,16 @@
 	ret
 endfunc cortex_helios_cpu_pwr_dwn
 
+#if REPORT_ERRATA
+/*
+ * Errata printing function for Cortex Helios. Must follow AAPCS.
+ */
+func cortex_helios_errata_report
+	ret
+endfunc cortex_helios_errata_report
+#endif
+
+
 .section .rodata.cortex_helios_regs, "aS"
 cortex_helios_regs:  /* The ascii list of register names to be reported */
 	.asciz	"cpuectlr_el1", ""
diff --git a/lib/pmf/pmf_main.c b/lib/pmf/pmf_main.c
index a020860..25513c1 100644
--- a/lib/pmf/pmf_main.c
+++ b/lib/pmf/pmf_main.c
@@ -25,9 +25,10 @@
 
 IMPORT_SYM(uintptr_t, __PMF_SVC_DESCS_START__,		PMF_SVC_DESCS_START);
 IMPORT_SYM(uintptr_t, __PMF_SVC_DESCS_END__,		PMF_SVC_DESCS_END);
-IMPORT_SYM(uintptr_t, __PERCPU_TIMESTAMP_SIZE__,	PMF_PERCPU_TIMESTAMP_SIZE);
+IMPORT_SYM(uintptr_t, __PMF_PERCPU_TIMESTAMP_END__,	PMF_PERCPU_TIMESTAMP_END);
 IMPORT_SYM(intptr_t,  __PMF_TIMESTAMP_START__,		PMF_TIMESTAMP_ARRAY_START);
-IMPORT_SYM(uintptr_t, __PMF_TIMESTAMP_END__,		PMF_TIMESTAMP_ARRAY_END);
+
+#define PMF_PERCPU_TIMESTAMP_SIZE	(PMF_PERCPU_TIMESTAMP_END - PMF_TIMESTAMP_ARRAY_START)
 
 #define PMF_SVC_DESCS_MAX		10
 
diff --git a/lib/romlib/init.s b/lib/romlib/init.s
index 5cf2aca..7d97e4d 100644
--- a/lib/romlib/init.s
+++ b/lib/romlib/init.s
@@ -5,7 +5,7 @@
  */
 
 	.globl	rom_lib_init
-	.extern	__DATA_RAM_START__, __DATA_ROM_START__, __DATA_SIZE__
+	.extern	__DATA_RAM_START__, __DATA_ROM_START__, __DATA_RAM_END__
 	.extern	memset, memcpy
 
 rom_lib_init:
@@ -16,13 +16,19 @@
 
 1:	stp	x29, x30, [sp, #-16]!
 	adrp	x0, __DATA_RAM_START__
-	ldr	x1,= __DATA_ROM_START__
-	ldr	x2, =__DATA_SIZE__
+	adrp	x1, __DATA_ROM_START__
+	add	x1, x1, :lo12:__DATA_ROM_START__
+	adrp	x2, __DATA_RAM_END__
+	add	x2, x2, :lo12:__DATA_RAM_END__
+	sub	x2, x2, x0
 	bl	memcpy
 
-	ldr	x0, =__BSS_START__
+	adrp	x0,__BSS_START__
+	add	x0, x0, :lo12:__BSS_START__
 	mov	x1, #0
-	ldr	x2, =__BSS_SIZE__
+	adrp	x2, __BSS_END__
+	add	x2, x2, :lo12:__BSS_END__
+	sub	x2, x2, x0
 	bl	memset
 	ldp	x29, x30, [sp], #16
 
diff --git a/lib/xlat_tables_v2/aarch64/enable_mmu.S b/lib/xlat_tables_v2/aarch64/enable_mmu.S
index 21717d2..504c03c 100644
--- a/lib/xlat_tables_v2/aarch64/enable_mmu.S
+++ b/lib/xlat_tables_v2/aarch64/enable_mmu.S
@@ -45,7 +45,8 @@
 		tlbi_invalidate_all \el
 
 		mov	x7, x0
-		ldr	x0, =mmu_cfg_params
+		adrp	x0, mmu_cfg_params
+		add	x0, x0, :lo12:mmu_cfg_params
 
 		/* MAIR */
 		ldr	x1, [x0, #(MMU_CFG_MAIR << 3)]
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index 435de20..4a3f541 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -64,6 +64,9 @@
 # Build option to enable MPAM for lower ELs
 ENABLE_MPAM_FOR_LOWER_ELS	:= 0
 
+# Flag to Enable Position Independant support (PIE)
+ENABLE_PIE			:= 0
+
 # Flag to enable Performance Measurement Framework
 ENABLE_PMF			:= 0
 
diff --git a/plat/arm/board/fvp/aarch64/fvp_helpers.S b/plat/arm/board/fvp/aarch64/fvp_helpers.S
index 88fcdb1..abc3ceb 100644
--- a/plat/arm/board/fvp/aarch64/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch64/fvp_helpers.S
@@ -19,7 +19,7 @@
 	.globl	plat_arm_calc_core_pos
 
 	.macro	fvp_choose_gicmmap  param1, param2, x_tmp, w_tmp, res
-	ldr	\x_tmp, =V2M_SYSREGS_BASE + V2M_SYS_ID
+	mov_imm	\x_tmp, V2M_SYSREGS_BASE + V2M_SYS_ID
 	ldr	\w_tmp, [\x_tmp]
 	ubfx	\w_tmp, \w_tmp, #V2M_SYS_ID_BLD_SHIFT, #V2M_SYS_ID_BLD_LENGTH
 	cmp	\w_tmp, #BLD_GIC_VE_MMAP
@@ -48,7 +48,7 @@
 	 * ---------------------------------------------
 	 */
 	mrs	x0, mpidr_el1
-	ldr	x1, =PWRC_BASE
+	mov_imm	x1, PWRC_BASE
 	str	w0, [x1, #PPOFFR_OFF]
 
 	/* ---------------------------------------------
@@ -72,8 +72,8 @@
 	b	secondary_cold_boot_wait
 
 gicv2_bypass_disable:
-	ldr	x0, =VE_GICC_BASE
-	ldr	x1, =BASE_GICC_BASE
+	mov_imm	x0, VE_GICC_BASE
+	mov_imm	x1, BASE_GICC_BASE
 	fvp_choose_gicmmap	x0, x1, x2, w2, x1
 	mov	w0, #(IRQ_BYP_DIS_GRP1 | FIQ_BYP_DIS_GRP1)
 	orr	w0, w0, #(IRQ_BYP_DIS_GRP0 | FIQ_BYP_DIS_GRP0)
@@ -128,7 +128,7 @@
 	 * ---------------------------------------------------------------------
 	 */
 	mrs	x2, mpidr_el1
-	ldr	x1, =PWRC_BASE
+	mov_imm	x1, PWRC_BASE
 	str	w2, [x1, #PSYSR_OFF]
 	ldr	w2, [x1, #PSYSR_OFF]
 	ubfx	w2, w2, #PSYSR_WK_SHIFT, #PSYSR_WK_WIDTH
@@ -171,7 +171,7 @@
 	 */
 func plat_is_my_cpu_primary
 	mrs	x0, mpidr_el1
-	ldr	x1, =MPIDR_AFFINITY_MASK
+	mov_imm	x1, MPIDR_AFFINITY_MASK
 	and	x0, x0, x1
 	cmp	x0, #FVP_PRIMARY_CPU
 	cset	w0, eq
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index 332df4d..3d858c2 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -202,7 +202,9 @@
 DYNAMIC_WORKAROUND_CVE_2018_3639	:=	1
 
 # Enable reclaiming of BL31 initialisation code for secondary cores stacks for FVP
+ifneq (${RESET_TO_BL31},1)
 RECLAIM_INIT_CODE	:=	1
+endif
 
 ifeq (${ENABLE_AMU},1)
 BL31_SOURCES		+=	lib/cpus/aarch64/cortex_a75_pubsub.c	\
diff --git a/plat/arm/common/arm_bl31_setup.c b/plat/arm/common/arm_bl31_setup.c
index 24d5570..1b05f46 100644
--- a/plat/arm/common/arm_bl31_setup.c
+++ b/plat/arm/common/arm_bl31_setup.c
@@ -25,11 +25,13 @@
 static entry_point_info_t bl32_image_ep_info;
 static entry_point_info_t bl33_image_ep_info;
 
+#if !RESET_TO_BL31
 /*
  * Check that BL31_BASE is above ARM_TB_FW_CONFIG_LIMIT. The reserved page
  * is required for SOC_FW_CONFIG/TOS_FW_CONFIG passed from BL2.
  */
 CASSERT(BL31_BASE >= ARM_TB_FW_CONFIG_LIMIT, assert_bl31_base_overflows);
+#endif
 
 /* Weak definitions may be overridden in specific ARM standard platform */
 #pragma weak bl31_early_platform_setup2
@@ -38,8 +40,8 @@
 #pragma weak bl31_plat_get_next_image_ep_info
 
 #define MAP_BL31_TOTAL		MAP_REGION_FLAT(			\
-					BL31_BASE,			\
-					BL31_END - BL31_BASE,		\
+					BL31_START,			\
+					BL31_END - BL31_START,		\
 					MT_MEMORY | MT_RW | MT_SECURE)
 #if RECLAIM_INIT_CODE
 IMPORT_SYM(unsigned long, __INIT_CODE_START__, BL_INIT_CODE_BASE);
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index 3fb1eff..23777fb 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -130,6 +130,11 @@
 $(eval $(call assert_boolean,ARM_CRYPTOCELL_INTEG))
 $(eval $(call add_define,ARM_CRYPTOCELL_INTEG))
 
+# Enable PIE support for RESET_TO_BL31 case
+ifeq (${RESET_TO_BL31},1)
+    ENABLE_PIE			:=	1
+endif
+
 # CryptoCell integration relies on coherent buffers for passing data from
 # the AP CPU to the CryptoCell
 ifeq (${ARM_CRYPTOCELL_INTEG},1)
diff --git a/plat/arm/common/arm_gicv3.c b/plat/arm/common/arm_gicv3.c
index e9e8a74..a43bff3 100644
--- a/plat/arm/common/arm_gicv3.c
+++ b/plat/arm/common/arm_gicv3.c
@@ -10,6 +10,7 @@
 #include <plat_arm.h>
 #include <platform.h>
 #include <platform_def.h>
+#include <utils.h>
 
 /******************************************************************************
  * The following functions are defined as weak to allow a platform to override
@@ -33,10 +34,16 @@
 
 /*
  * We save and restore the GICv3 context on system suspend. Allocate the
- * data in the designated EL3 Secure carve-out memory
+ * data in the designated EL3 Secure carve-out memory. The `volatile`
+ * is used to prevent the compiler from removing the gicv3 contexts even
+ * though the DEFINE_LOAD_SYM_ADDR creates a dummy reference to it.
  */
-static gicv3_redist_ctx_t rdist_ctx __section("arm_el3_tzc_dram");
-static gicv3_dist_ctx_t dist_ctx __section("arm_el3_tzc_dram");
+static volatile gicv3_redist_ctx_t rdist_ctx __section("arm_el3_tzc_dram");
+static volatile gicv3_dist_ctx_t dist_ctx __section("arm_el3_tzc_dram");
+
+/* Define accessor function to get reference to the GICv3 context */
+DEFINE_LOAD_SYM_ADDR(rdist_ctx)
+DEFINE_LOAD_SYM_ADDR(dist_ctx)
 
 /*
  * MPIDR hashing function for translating MPIDRs read from GICR_TYPER register
@@ -134,6 +141,10 @@
  *****************************************************************************/
 void plat_arm_gic_save(void)
 {
+	gicv3_redist_ctx_t * const rdist_context =
+			(gicv3_redist_ctx_t *)LOAD_ADDR_OF(rdist_ctx);
+	gicv3_dist_ctx_t * const dist_context =
+			(gicv3_dist_ctx_t *)LOAD_ADDR_OF(dist_ctx);
 
 	/*
 	 * If an ITS is available, save its context before
@@ -149,10 +160,10 @@
 	 * we only need to save the context of the CPU that is issuing
 	 * the SYSTEM SUSPEND call, i.e. the current CPU.
 	 */
-	gicv3_rdistif_save(plat_my_core_pos(), &rdist_ctx);
+	gicv3_rdistif_save(plat_my_core_pos(), rdist_context);
 
 	/* Save the GIC Distributor context */
-	gicv3_distif_save(&dist_ctx);
+	gicv3_distif_save(dist_context);
 
 	/*
 	 * From here, all the components of the GIC can be safely powered down
@@ -163,8 +174,13 @@
 
 void plat_arm_gic_resume(void)
 {
+	const gicv3_redist_ctx_t *rdist_context =
+			(gicv3_redist_ctx_t *)LOAD_ADDR_OF(rdist_ctx);
+	const gicv3_dist_ctx_t *dist_context =
+			(gicv3_dist_ctx_t *)LOAD_ADDR_OF(dist_ctx);
+
 	/* Restore the GIC Distributor context */
-	gicv3_distif_init_restore(&dist_ctx);
+	gicv3_distif_init_restore(dist_context);
 
 	/*
 	 * Restore the GIC Redistributor and ITS contexts after the
@@ -172,7 +188,7 @@
 	 * we only need to restore the context of the CPU that issued
 	 * the SYSTEM SUSPEND call.
 	 */
-	gicv3_rdistif_init_restore(plat_my_core_pos(), &rdist_ctx);
+	gicv3_rdistif_init_restore(plat_my_core_pos(), rdist_context);
 
 	/*
 	 * If an ITS is available, restore its context after
diff --git a/plat/arm/css/common/aarch64/css_helpers.S b/plat/arm/css/common/aarch64/css_helpers.S
index 59d9206..5096d8d 100644
--- a/plat/arm/css/common/aarch64/css_helpers.S
+++ b/plat/arm/css/common/aarch64/css_helpers.S
@@ -108,7 +108,7 @@
 func plat_is_my_cpu_primary
 	mov	x9, x30
 	bl	plat_my_core_pos
-	ldr	x1, =SCP_BOOT_CFG_ADDR
+	mov_imm	x1, SCP_BOOT_CFG_ADDR
 	ldr	x1, [x1]
 	ubfx	x1, x1, #PLAT_CSS_PRIMARY_CPU_SHIFT, \
 			#PLAT_CSS_PRIMARY_CPU_BIT_WIDTH