Add PIE support for AARCH32

Only BL32 (SP_min) is supported at the moment, BL1 and BL2_AT_EL3 are just
stubbed with _pie_fixup_size=0.
The changes are an adaptation for AARCH32 on what has been done for
PIE support on AARCH64.
The RELA_SECTION is redefined for AARCH32, as the created section is
.rel.dyn and the symbols are .rel*.

Change-Id: I92bafe70e6b77735f6f890f32f2b637b98cf01b9
Signed-off-by: Yann Gautier <yann.gautier@st.com>
diff --git a/lib/aarch32/misc_helpers.S b/lib/aarch32/misc_helpers.S
index e9734ac..8b16f93 100644
--- a/lib/aarch32/misc_helpers.S
+++ b/lib/aarch32/misc_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -7,6 +7,8 @@
 #include <arch.h>
 #include <asm_macros.S>
 #include <assert_macros.S>
+#include <common/bl_common.h>
+#include <lib/xlat_tables/xlat_tables_defs.h>
 
 	.globl	smc
 	.globl	zeromem
@@ -14,6 +16,9 @@
 	.globl	memcpy4
 	.globl	disable_mmu_icache_secure
 	.globl	disable_mmu_secure
+	.globl	fixup_gdt_reloc
+
+#define PAGE_START_MASK		~(PAGE_SIZE_MASK)
 
 func smc
 	/*
@@ -187,3 +192,124 @@
 	ldr	r1, =(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
 	b	do_disable_mmu
 endfunc disable_mmu_icache_secure
+
+/* ---------------------------------------------------------------------------
+ * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
+ * (.rel.dyn) at runtime.
+ *
+ * This function is meant to be used when the firmware is compiled with -fpie
+ * and linked with -pie options. We rely on the linker script exporting
+ * appropriate markers for start and end of the section. For GOT, we
+ * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
+ * __RELA_START__ and __RELA_END__.
+ *
+ * The function takes the limits of the memory to apply fixups to as
+ * arguments (which is usually the limits of the relocable BL image).
+ *   r0 -  the start of the fixup region
+ *   r1 -  the limit of the fixup region
+ * These addresses have to be 4KB page aligned.
+ * ---------------------------------------------------------------------------
+ */
+
+/* Relocation codes */
+#define R_ARM_RELATIVE 	23
+
+func fixup_gdt_reloc
+	mov	r6, r0
+	mov	r7, r1
+
+#if ENABLE_ASSERTIONS
+	/* Test if the limits are 4K aligned */
+	orr	r0, r0, r1
+	mov	r1, #(PAGE_SIZE_MASK)
+	tst	r0, r1
+	ASM_ASSERT(eq)
+#endif
+	/*
+	 * Calculate the offset based on return address in lr.
+	 * Assume that this function is called within a page at the start of
+	 * fixup region.
+	 */
+	ldr	r1, =PAGE_START_MASK
+	and	r2, lr, r1
+	subs	r0, r2, r6	/* Diff(S) = Current Address - Compiled Address */
+	beq	3f		/* Diff(S) = 0. No relocation needed */
+
+	ldr	r1, =__GOT_START__
+	add	r1, r1, r0
+	ldr	r2, =__GOT_END__
+	add	r2, r2, r0
+
+	/*
+	 * GOT is an array of 32_bit addresses which must be fixed up as
+	 * new_addr = old_addr + Diff(S).
+	 * The new_addr is the address currently the binary is executing from
+	 * and old_addr is the address at compile time.
+	 */
+1:	ldr	r3, [r1]
+
+	/* Skip adding offset if address is < lower limit */
+	cmp	r3, r6
+	blo	2f
+
+	/* Skip adding offset if address is > upper limit */
+	cmp	r3, r7
+	bhi	2f
+	add	r3, r3, r0
+	str	r3, [r1]
+
+2:	add	r1, r1, #4
+	cmp	r1, r2
+	blo	1b
+
+	/* Starting dynamic relocations. Use ldr to get RELA_START and END */
+3:	ldr	r1, =__RELA_START__
+	add	r1, r1, r0
+	ldr	r2, =__RELA_END__
+	add	r2, r2, r0
+
+	/*
+	 * According to ELF-32 specification, the RELA data structure is as
+	 * follows:
+	 *	typedef struct {
+	 *		Elf32_Addr r_offset;
+	 *		Elf32_Xword r_info;
+	 *	} Elf32_Rela;
+	 *
+	 * r_offset is address of reference
+	 * r_info is symbol index and type of relocation (in this case
+	 * code 23  which corresponds to R_ARM_RELATIVE).
+	 *
+	 * Size of Elf32_Rela structure is 8 bytes.
+	 */
+
+	/* Skip R_ARM_NONE entry with code 0 */
+1:	ldr	r3, [r1, #4]
+	ands	r3, r3, #0xff
+	beq	2f
+
+#if ENABLE_ASSERTIONS
+	/* Assert that the relocation type is R_ARM_RELATIVE */
+	cmp	r3, #R_ARM_RELATIVE
+	ASM_ASSERT(eq)
+#endif
+	ldr	r3, [r1]	/* r_offset */
+	add	r3, r0, r3	/* Diff(S) + r_offset */
+	ldr 	r4, [r3]
+
+	/* Skip adding offset if address is < lower limit */
+	cmp	r4, r6
+	blo	2f
+
+	/* Skip adding offset if address is >= upper limit */
+	cmp	r4, r7
+	bhs	2f
+
+	add 	r4, r0, r4
+	str	r4, [r3]
+
+2:	add	r1, r1, #8
+	cmp	r1, r2
+	blo	1b
+	bx	lr
+endfunc fixup_gdt_reloc