PIE: Use PC relative adrp/adr for symbol reference

This patch fixes up the AArch64 assembly code to use
adrp/adr instructions instead of ldr instruction for
reference to symbols. This allows these assembly
sequences to be Position Independant. Note that the
the reference to sizes have been replaced with
calculation of size at runtime. This is because size
is a constant value and does not depend on execution
address and using PC relative instructions for loading
them makes them relative to execution address. Also
we cannot use `ldr` instruction to load size as it
generates a dynamic relocation entry which must *not*
be fixed up and it is difficult for a dynamic loader
to differentiate which entries need to be skipped.

Change-Id: I8bf4ed5c58a9703629e5498a27624500ef40a836
Signed-off-by: Soby Mathew <soby.mathew@arm.com>
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index bc8cbfd..30a5c59 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -70,13 +70,19 @@
 	 *   - the coherent memory section.
 	 * ---------------------------------------------
 	 */
-	ldr	x0, =__BSS_START__
-	ldr	x1, =__BSS_SIZE__
+	adrp	x0, __BSS_START__
+	add	x0, x0, :lo12:__BSS_START__
+	adrp	x1, __BSS_END__
+	add	x1, x1, :lo12:__BSS_END__
+	sub	x1, x1, x0
 	bl	zeromem
 
 #if USE_COHERENT_MEM
-	ldr	x0, =__COHERENT_RAM_START__
-	ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
+	adrp	x0, __COHERENT_RAM_START__
+	add	x0, x0, :lo12:__COHERENT_RAM_START__
+	adrp	x1, __COHERENT_RAM_END_UNALIGNED__
+	add	x1, x1, :lo12:__COHERENT_RAM_END_UNALIGNED__
+	sub	x1, x1, x0
 	bl	zeromem
 #endif
 
diff --git a/include/common/aarch64/asm_macros.S b/include/common/aarch64/asm_macros.S
index 9621a1c..91416e4 100644
--- a/include/common/aarch64/asm_macros.S
+++ b/include/common/aarch64/asm_macros.S
@@ -105,8 +105,9 @@
 	 * Clobber: X30, X1, X2
 	 */
 	.macro get_my_mp_stack _name, _size
-	bl  plat_my_core_pos
-	ldr x2, =(\_name + \_size)
+	bl	plat_my_core_pos
+	adrp	x2, (\_name + \_size)
+	add	x2, x2, :lo12:(\_name + \_size)
 	mov x1, #\_size
 	madd x0, x0, x1, x2
 	.endm
@@ -117,7 +118,8 @@
 	 * Out: X0 = physical address of stack base
 	 */
 	.macro get_up_stack _name, _size
-	ldr x0, =(\_name + \_size)
+	adrp	x0, (\_name + \_size)
+	add	x0, x0, :lo12:(\_name + \_size)
 	.endm
 
 	/*
diff --git a/include/common/aarch64/el3_common_macros.S b/include/common/aarch64/el3_common_macros.S
index 143c70c..4902583 100644
--- a/include/common/aarch64/el3_common_macros.S
+++ b/include/common/aarch64/el3_common_macros.S
@@ -283,26 +283,38 @@
 		 * an earlier boot loader stage.
 		 * -------------------------------------------------------------
 		 */
-		ldr	x0, =__RW_START__
-		ldr	x1, =__RW_END__
+		adrp	x0, __RW_START__
+		add	x0, x0, :lo12:__RW_START__
+		adrp	x1, __RW_END__
+		add	x1, x1, :lo12:__RW_END__
 		sub	x1, x1, x0
 		bl	inv_dcache_range
 #endif
+		adrp	x0, __BSS_START__
+		add	x0, x0, :lo12:__BSS_START__
 
-		ldr	x0, =__BSS_START__
-		ldr	x1, =__BSS_SIZE__
+		adrp	x1, __BSS_END__
+		add	x1, x1, :lo12:__BSS_END__
+		sub	x1, x1, x0
 		bl	zeromem
 
 #if USE_COHERENT_MEM
-		ldr	x0, =__COHERENT_RAM_START__
-		ldr	x1, =__COHERENT_RAM_UNALIGNED_SIZE__
+		adrp	x0, __COHERENT_RAM_START__
+		add	x0, x0, :lo12:__COHERENT_RAM_START__
+		adrp	x1, __COHERENT_RAM_END_UNALIGNED__
+		add	x1, x1, :lo12: __COHERENT_RAM_END_UNALIGNED__
+		sub	x1, x1, x0
 		bl	zeromem
 #endif
 
 #if defined(IMAGE_BL1) || (defined(IMAGE_BL2) && BL2_IN_XIP_MEM)
-		ldr	x0, =__DATA_RAM_START__
-		ldr	x1, =__DATA_ROM_START__
-		ldr	x2, =__DATA_SIZE__
+		adrp	x0, __DATA_RAM_START__
+		add	x0, x0, :lo12:__DATA_RAM_START__
+		adrp	x1, __DATA_ROM_START__
+		add	x1, x1, :lo12:__DATA_ROM_START__
+		adrp	x2, __DATA_RAM_END__
+		add	x2, x2, :lo12:__DATA_RAM_END__
+		sub	x2, x2, x0
 		bl	memcpy16
 #endif
 	.endif /* _init_c_runtime */
diff --git a/include/lib/pmf/pmf_asm_macros.S b/include/lib/pmf/pmf_asm_macros.S
index d58829e..5e19e62 100644
--- a/include/lib/pmf/pmf_asm_macros.S
+++ b/include/lib/pmf/pmf_asm_macros.S
@@ -18,10 +18,12 @@
 	mov	x9, x30
 	bl	plat_my_core_pos
 	mov	x30, x9
-	ldr	x1, =__PERCPU_TIMESTAMP_SIZE__
+	adr	x2, __PMF_PERCPU_TIMESTAMP_END__
+	adr	x1, __PMF_TIMESTAMP_START__
+	sub	x1, x2, x1
 	mov	x2, #(\_tid * PMF_TS_SIZE)
 	madd	x0, x0, x1, x2
-	ldr	x1, =pmf_ts_mem_\_name
+	adr	x1, pmf_ts_mem_\_name
 	add	x0, x0, x1
 	.endm
 
diff --git a/lib/romlib/init.s b/lib/romlib/init.s
index 5cf2aca..7d97e4d 100644
--- a/lib/romlib/init.s
+++ b/lib/romlib/init.s
@@ -5,7 +5,7 @@
  */
 
 	.globl	rom_lib_init
-	.extern	__DATA_RAM_START__, __DATA_ROM_START__, __DATA_SIZE__
+	.extern	__DATA_RAM_START__, __DATA_ROM_START__, __DATA_RAM_END__
 	.extern	memset, memcpy
 
 rom_lib_init:
@@ -16,13 +16,19 @@
 
 1:	stp	x29, x30, [sp, #-16]!
 	adrp	x0, __DATA_RAM_START__
-	ldr	x1,= __DATA_ROM_START__
-	ldr	x2, =__DATA_SIZE__
+	adrp	x1, __DATA_ROM_START__
+	add	x1, x1, :lo12:__DATA_ROM_START__
+	adrp	x2, __DATA_RAM_END__
+	add	x2, x2, :lo12:__DATA_RAM_END__
+	sub	x2, x2, x0
 	bl	memcpy
 
-	ldr	x0, =__BSS_START__
+	adrp	x0,__BSS_START__
+	add	x0, x0, :lo12:__BSS_START__
 	mov	x1, #0
-	ldr	x2, =__BSS_SIZE__
+	adrp	x2, __BSS_END__
+	add	x2, x2, :lo12:__BSS_END__
+	sub	x2, x2, x0
 	bl	memset
 	ldp	x29, x30, [sp], #16
 
diff --git a/lib/xlat_tables_v2/aarch64/enable_mmu.S b/lib/xlat_tables_v2/aarch64/enable_mmu.S
index 21717d2..504c03c 100644
--- a/lib/xlat_tables_v2/aarch64/enable_mmu.S
+++ b/lib/xlat_tables_v2/aarch64/enable_mmu.S
@@ -45,7 +45,8 @@
 		tlbi_invalidate_all \el
 
 		mov	x7, x0
-		ldr	x0, =mmu_cfg_params
+		adrp	x0, mmu_cfg_params
+		add	x0, x0, :lo12:mmu_cfg_params
 
 		/* MAIR */
 		ldr	x1, [x0, #(MMU_CFG_MAIR << 3)]