Add PIE support for AARCH32

Only BL32 (SP_min) is supported at the moment, BL1 and BL2_AT_EL3 are just
stubbed with _pie_fixup_size=0.
The changes are an adaptation for AARCH32 on what has been done for
PIE support on AARCH64.
The RELA_SECTION is redefined for AARCH32, as the created section is
.rel.dyn and the symbols are .rel*.

Change-Id: I92bafe70e6b77735f6f890f32f2b637b98cf01b9
Signed-off-by: Yann Gautier <yann.gautier@st.com>
diff --git a/Makefile b/Makefile
index d424508..219413e 100644
--- a/Makefile
+++ b/Makefile
@@ -579,11 +579,9 @@
 endif
 	BL31_CFLAGS	+=	-fpie
 	BL31_LDFLAGS	+=	$(PIE_LDFLAGS)
-ifeq ($(ARCH),aarch64)
 	BL32_CFLAGS	+=	-fpie
 	BL32_LDFLAGS	+=	$(PIE_LDFLAGS)
 endif
-endif
 
 ifeq (${ARCH},aarch64)
 BL1_CPPFLAGS += -DIMAGE_AT_EL3
diff --git a/bl1/aarch32/bl1_entrypoint.S b/bl1/aarch32/bl1_entrypoint.S
index 6a15566..94dfd37 100644
--- a/bl1/aarch32/bl1_entrypoint.S
+++ b/bl1/aarch32/bl1_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -49,7 +49,8 @@
 		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU	\
 		_init_memory=1					\
 		_init_c_runtime=1				\
-		_exception_vectors=bl1_vector_table
+		_exception_vectors=bl1_vector_table		\
+		_pie_fixup_size=0
 
 	/* -----------------------------------------------------
 	 * Perform BL1 setup
diff --git a/bl2/aarch32/bl2_el3_entrypoint.S b/bl2/aarch32/bl2_el3_entrypoint.S
index 2e851e6..7e85551 100644
--- a/bl2/aarch32/bl2_el3_entrypoint.S
+++ b/bl2/aarch32/bl2_el3_entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2017-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -26,7 +26,8 @@
                 _secondary_cold_boot=!COLD_BOOT_SINGLE_CPU      \
                 _init_memory=1                                  \
                 _init_c_runtime=1                               \
-                _exception_vectors=bl2_vector_table
+                _exception_vectors=bl2_vector_table		\
+		_pie_fixup_size=0
 
 	/*
 	 * Restore parameters of boot rom
diff --git a/bl32/sp_min/aarch32/entrypoint.S b/bl32/sp_min/aarch32/entrypoint.S
index f3a1e44..39f1065 100644
--- a/bl32/sp_min/aarch32/entrypoint.S
+++ b/bl32/sp_min/aarch32/entrypoint.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -23,6 +23,8 @@
 	.globl	sp_min_handle_smc
 	.globl	sp_min_handle_fiq
 
+#define FIXUP_SIZE	((BL32_LIMIT) - (BL32_BASE))
+
 	.macro route_fiq_to_sp_min reg
 		/* -----------------------------------------------------
 		 * FIQs are secure interrupts trapped by Monitor and non
@@ -87,7 +89,8 @@
 		_secondary_cold_boot=0				\
 		_init_memory=0					\
 		_init_c_runtime=1				\
-		_exception_vectors=sp_min_vector_table
+		_exception_vectors=sp_min_vector_table		\
+		_pie_fixup_size=FIXUP_SIZE
 
 	/* ---------------------------------------------------------------------
 	 * Relay the previous bootloader's arguments to the platform layer
@@ -106,7 +109,8 @@
 		_secondary_cold_boot=!COLD_BOOT_SINGLE_CPU	\
 		_init_memory=1					\
 		_init_c_runtime=1				\
-		_exception_vectors=sp_min_vector_table
+		_exception_vectors=sp_min_vector_table		\
+		_pie_fixup_size=FIXUP_SIZE
 
 	/* ---------------------------------------------------------------------
 	 * For RESET_TO_SP_MIN systems, BL32 (SP_MIN) is the first bootloader
@@ -306,7 +310,8 @@
 		_secondary_cold_boot=0				\
 		_init_memory=0					\
 		_init_c_runtime=0				\
-		_exception_vectors=sp_min_vector_table
+		_exception_vectors=sp_min_vector_table		\
+		_pie_fixup_size=0
 
 	/*
 	 * We're about to enable MMU and participate in PSCI state coordination.
diff --git a/bl32/sp_min/sp_min.ld.S b/bl32/sp_min/sp_min.ld.S
index f202c7a..475affa 100644
--- a/bl32/sp_min/sp_min.ld.S
+++ b/bl32/sp_min/sp_min.ld.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -92,6 +92,7 @@
     __RW_START__ = . ;
 
     DATA_SECTION >RAM
+    RELA_SECTION >RAM
 
 #ifdef BL32_PROGBITS_LIMIT
     ASSERT(. <= BL32_PROGBITS_LIMIT, "BL32 progbits has exceeded its limit.")
@@ -141,5 +142,9 @@
 
     __BL32_END__ = .;
 
+    /DISCARD/ : {
+        *(.dynsym .dynstr .hash .gnu.hash)
+    }
+
     ASSERT(. <= BL32_LIMIT, "BL32 image has exceeded its limit.")
 }
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 5935b4e..2c3cd6b 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -252,7 +252,8 @@
 
 -  ``ENABLE_PIE``: Boolean option to enable Position Independent Executable(PIE)
    support within generic code in TF-A. This option is currently only supported
-   in BL2_AT_EL3, BL31, and BL32 (TSP). Default is 0.
+   in BL2_AT_EL3, BL31, and BL32 (TSP) for AARCH64 binaries, and in BL32
+   (SP_min) for AARCH32. Default is 0.
 
 -  ``ENABLE_PMF``: Boolean option to enable support for optional Performance
    Measurement Framework(PMF). Default is 0.
@@ -847,4 +848,4 @@
 
 --------------
 
-*Copyright (c) 2019-2020, Arm Limited. All rights reserved.*
+*Copyright (c) 2019-2021, Arm Limited. All rights reserved.*
diff --git a/include/arch/aarch32/el3_common_macros.S b/include/arch/aarch32/el3_common_macros.S
index 6caebf8..7fff4c7 100644
--- a/include/arch/aarch32/el3_common_macros.S
+++ b/include/arch/aarch32/el3_common_macros.S
@@ -10,6 +10,9 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <assert_macros.S>
+#include <lib/xlat_tables/xlat_tables_defs.h>
+
+#define PAGE_START_MASK		~(PAGE_SIZE_MASK)
 
 	/*
 	 * Helper macro to initialise EL3 registers we care about.
@@ -199,11 +202,18 @@
  *
  * _exception_vectors:
  *	Address of the exception vectors to program in the VBAR_EL3 register.
+ *
+ * _pie_fixup_size:
+ *	Size of memory region to fixup Global Descriptor Table (GDT).
+ *
+ *	A non-zero value is expected when firmware needs GDT to be fixed-up.
+ *
  * -----------------------------------------------------------------------------
  */
 	.macro el3_entrypoint_common					\
 		_init_sctlr, _warm_boot_mailbox, _secondary_cold_boot,	\
-		_init_memory, _init_c_runtime, _exception_vectors
+		_init_memory, _init_c_runtime, _exception_vectors,	\
+		_pie_fixup_size
 
 	/* Make sure we are in Secure Mode */
 #if ENABLE_ASSERTIONS
@@ -259,6 +269,27 @@
 		bxne	r0
 	.endif /* _warm_boot_mailbox */
 
+	.if \_pie_fixup_size
+#if ENABLE_PIE
+		/*
+		 * ------------------------------------------------------------
+		 * If PIE is enabled fixup the Global descriptor Table only
+		 * once during primary core cold boot path.
+		 *
+		 * Compile time base address, required for fixup, is calculated
+		 * using "pie_fixup" label present within first page.
+		 * ------------------------------------------------------------
+		 */
+	pie_fixup:
+		ldr	r0, =pie_fixup
+		ldr	r1, =PAGE_START_MASK
+		and	r0, r0, r1
+		mov_imm	r1, \_pie_fixup_size
+		add	r1, r1, r0
+		bl	fixup_gdt_reloc
+#endif /* ENABLE_PIE */
+	.endif /* _pie_fixup_size */
+
 	/* ---------------------------------------------------------------------
 	 * Set the exception vectors (VBAR/MVBAR).
 	 * ---------------------------------------------------------------------
diff --git a/include/common/bl_common.ld.h b/include/common/bl_common.ld.h
index ab3391a..5147e37 100644
--- a/include/common/bl_common.ld.h
+++ b/include/common/bl_common.ld.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2020-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -105,10 +105,18 @@
  * .rela.dyn needs to come after .data for the read-elf utility to parse
  * this section correctly.
  */
+#if __aarch64__
+#define RELA_DYN_NAME		.rela.dyn
+#define RELOC_SECTIONS_PATTERN	*(.rela*)
+#else
+#define RELA_DYN_NAME		.rel.dyn
+#define RELOC_SECTIONS_PATTERN	*(.rel*)
+#endif
+
 #define RELA_SECTION					\
-	.rela.dyn : ALIGN(STRUCT_ALIGN) {		\
+	RELA_DYN_NAME : ALIGN(STRUCT_ALIGN) {		\
 		__RELA_START__ = .;			\
-		*(.rela*)				\
+		RELOC_SECTIONS_PATTERN			\
 		__RELA_END__ = .;			\
 	}
 
diff --git a/lib/aarch32/misc_helpers.S b/lib/aarch32/misc_helpers.S
index e9734ac..8b16f93 100644
--- a/lib/aarch32/misc_helpers.S
+++ b/lib/aarch32/misc_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,8 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <assert_macros.S>
+#include <common/bl_common.h>
+#include <lib/xlat_tables/xlat_tables_defs.h>
 
 	.globl	smc
 	.globl	zeromem
@@ -14,6 +16,9 @@
 	.globl	memcpy4
 	.globl	disable_mmu_icache_secure
 	.globl	disable_mmu_secure
+	.globl	fixup_gdt_reloc
+
+#define PAGE_START_MASK		~(PAGE_SIZE_MASK)
 
 func smc
 	/*
@@ -187,3 +192,124 @@
 	ldr	r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
 	b	do_disable_mmu
 endfunc disable_mmu_icache_secure
+
+/* ---------------------------------------------------------------------------
+ * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
+ * (.rel.dyn) at runtime.
+ *
+ * This function is meant to be used when the firmware is compiled with -fpie
+ * and linked with -pie options. We rely on the linker script exporting
+ * appropriate markers for start and end of the section. For GOT, we
+ * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
+ * __RELA_START__ and __RELA_END__.
+ *
+ * The function takes the limits of the memory to apply fixups to as
+ * arguments (which is usually the limits of the relocable BL image).
+ *   r0 -  the start of the fixup region
+ *   r1 -  the limit of the fixup region
+ * These addresses have to be 4KB page aligned.
+ * ---------------------------------------------------------------------------
+ */
+
+/* Relocation codes */
+#define R_ARM_RELATIVE 	23
+
+func fixup_gdt_reloc
+	mov	r6, r0
+	mov	r7, r1
+
+#if ENABLE_ASSERTIONS
+	/* Test if the limits are 4K aligned */
+	orr	r0, r0, r1
+	mov	r1, #(PAGE_SIZE_MASK)
+	tst	r0, r1
+	ASM_ASSERT(eq)
+#endif
+	/*
+	 * Calculate the offset based on return address in lr.
+	 * Assume that this function is called within a page at the start of
+	 * fixup region.
+	 */
+	ldr	r1, =PAGE_START_MASK
+	and	r2, lr, r1
+	subs	r0, r2, r6	/* Diff(S) = Current Address - Compiled Address */
+	beq	3f		/* Diff(S) = 0. No relocation needed */
+
+	ldr	r1, =__GOT_START__
+	add	r1, r1, r0
+	ldr	r2, =__GOT_END__
+	add	r2, r2, r0
+
+	/*
+	 * GOT is an array of 32_bit addresses which must be fixed up as
+	 * new_addr = old_addr + Diff(S).
+	 * The new_addr is the address currently the binary is executing from
+	 * and old_addr is the address at compile time.
+	 */
+1:	ldr	r3, [r1]
+
+	/* Skip adding offset if address is < lower limit */
+	cmp	r3, r6
+	blo	2f
+
+	/* Skip adding offset if address is > upper limit */
+	cmp	r3, r7
+	bhi	2f
+	add	r3, r3, r0
+	str	r3, [r1]
+
+2:	add	r1, r1, #4
+	cmp	r1, r2
+	blo	1b
+
+	/* Starting dynamic relocations. Use ldr to get RELA_START and END */
+3:	ldr	r1, =__RELA_START__
+	add	r1, r1, r0
+	ldr	r2, =__RELA_END__
+	add	r2, r2, r0
+
+	/*
+	 * According to ELF-32 specification, the RELA data structure is as
+	 * follows:
+	 *	typedef struct {
+	 *		Elf32_Addr r_offset;
+	 *		Elf32_Xword r_info;
+	 *	} Elf32_Rela;
+	 *
+	 * r_offset is address of reference
+	 * r_info is symbol index and type of relocation (in this case
+	 * code 23  which corresponds to R_ARM_RELATIVE).
+	 *
+	 * Size of Elf32_Rela structure is 8 bytes.
+	 */
+
+	/* Skip R_ARM_NONE entry with code 0 */
+1:	ldr	r3, [r1, #4]
+	ands	r3, r3, #0xff
+	beq	2f
+
+#if ENABLE_ASSERTIONS
+	/* Assert that the relocation type is R_ARM_RELATIVE */
+	cmp	r3, #R_ARM_RELATIVE
+	ASM_ASSERT(eq)
+#endif
+	ldr	r3, [r1]	/* r_offset */
+	add	r3, r0, r3	/* Diff(S) + r_offset */
+	ldr 	r4, [r3]
+
+	/* Skip adding offset if address is < lower limit */
+	cmp	r4, r6
+	blo	2f
+
+	/* Skip adding offset if address is >= upper limit */
+	cmp	r4, r7
+	bhs	2f
+
+	add 	r4, r0, r4
+	str	r4, [r3]
+
+2:	add	r1, r1, #8
+	cmp	r1, r2
+	blo	1b
+	bx	lr
+endfunc fixup_gdt_reloc