Enable Link Time Optimization in GCC

This patch enables LTO for TF-A when compiled with GCC.
LTO is disabled by default and is enabled by
ENABLE_LTO=1 build option.

LTO is enabled only for aarch64 as there seem to be
a bug in the aarch32 compiler when LTO is enabled.

The changes in the makefiles include:
- Adding -flto and associated flags to enable LTO.
- Using gcc as a wrapper at link time instead of ld.
  This is recommended when using LTO as gcc internally
  takes care of invoking the necessary plugins for LTO.
- Adding switches to pass options to ld.
- Adding a flag to disable fix for erratum cortex-a53-843419
  unless explicitly enabled. This is needed because GCC
  seem to automatically add the erratum fix when used
  as a wrapper for LD.

Additionally, this patch updates the TF-A user guide with
the new build option.

Signed-off-by: zelalem-aweke <zelalem.aweke@arm.com>
Change-Id: I1188c11974da98434b7dc9344e058cd1eacf5468
diff --git a/Makefile b/Makefile
index 721246d..eddd48d 100644
--- a/Makefile
+++ b/Makefile
@@ -204,6 +204,18 @@
 AS			=	$(CC) -c -x assembler-with-cpp $(TF_CFLAGS_$(ARCH))
 CPP			=	$(CC) -E
 PP			=	$(CC) -E
+else ifneq ($(findstring gcc,$(notdir $(CC))),)
+TF_CFLAGS_aarch32	=	$(march32-directive)
+TF_CFLAGS_aarch64	=	$(march64-directive)
+ifeq ($(ENABLE_LTO),1)
+	# Enable LTO only for aarch64
+	ifeq (${ARCH},aarch64)
+		LTO_CFLAGS	=	-flto
+		# Use gcc as a wrapper for the ld, recommended for LTO
+		LINKER		:=	${CROSS_COMPILE}gcc
+	endif
+endif
+LD			=	$(LINKER)
 else
 TF_CFLAGS_aarch32	=	$(march32-directive)
 TF_CFLAGS_aarch64	=	$(march64-directive)
@@ -300,11 +312,28 @@
 ifneq ($(findstring armlink,$(notdir $(LD))),)
 TF_LDFLAGS		+=	--diag_error=warning --lto_level=O1
 TF_LDFLAGS		+=	--remove --info=unused,unusedsymbols
+TF_LDFLAGS		+=	$(TF_LDFLAGS_$(ARCH))
+else ifneq ($(findstring gcc,$(notdir $(LD))),)
+# Pass ld options with Wl or Xlinker switches
+TF_LDFLAGS		+=	-Wl,--fatal-warnings -O1
+TF_LDFLAGS		+=	-Wl,--gc-sections
+ifeq ($(ENABLE_LTO),1)
+	ifeq (${ARCH},aarch64)
+		TF_LDFLAGS	+=	-flto -fuse-linker-plugin
+	endif
+endif
+# GCC automatically adds fix-cortex-a53-843419 flag when used to link
+# which breaks some builds, so disable if errata fix is not explicitly enabled
+ifneq (${ERRATA_A53_843419},1)
+	TF_LDFLAGS	+= 	-mno-fix-cortex-a53-843419
+endif
+TF_LDFLAGS		+= 	-nostdlib
+TF_LDFLAGS		+=	$(subst --,-Xlinker --,$(TF_LDFLAGS_$(ARCH)))
 else
 TF_LDFLAGS		+=	--fatal-warnings -O1
 TF_LDFLAGS		+=	--gc-sections
-endif
 TF_LDFLAGS		+=	$(TF_LDFLAGS_$(ARCH))
+endif
 
 DTC_FLAGS		+=	-I dts -O dtb
 DTC_CPPFLAGS		+=	-nostdinc -Iinclude -undef -x assembler-with-cpp
@@ -405,7 +434,11 @@
 
 ifeq ($(ENABLE_PIE),1)
     TF_CFLAGS		+=	-fpie
-    TF_LDFLAGS		+=	-pie --no-dynamic-linker
+	ifneq ($(findstring gcc,$(notdir $(LD))),)
+		TF_LDFLAGS	+=	-Wl,-pie -Wl,--no-dynamic-linker
+	else
+		TF_LDFLAGS	+=	-pie --no-dynamic-linker
+	endif
 else
     PIE_FOUND		:=	$(findstring --enable-default-pie,${GCC_V_OUTPUT})
     ifneq ($(PIE_FOUND),)
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index fded1e0..051586b 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -196,6 +196,10 @@
    builds, but this behaviour can be overridden in each platform's Makefile or
    in the build command line.
 
+ -  ``ENABLE_LTO``: Boolean option to enable Link Time Optimization (LTO)
+   support in GCC for TF-A. This option is currently only supported for
+   AArch64. Default is 0.
+
 -  ``ENABLE_MPAM_FOR_LOWER_ELS``: Boolean option to enable lower ELs to use MPAM
    feature. MPAM is an optional Armv8.4 extension that enables various memory
    system components and resources to define partitions; software running at
diff --git a/make_helpers/build_macros.mk b/make_helpers/build_macros.mk
index b89d87e..47f3ebd 100644
--- a/make_helpers/build_macros.mk
+++ b/make_helpers/build_macros.mk
@@ -236,7 +236,7 @@
 
 $(OBJ): $(2) $(filter-out %.d,$(MAKEFILE_LIST)) | bl$(3)_dirs
 	$$(ECHO) "  CC      $$<"
-	$$(Q)$$(CC) $$(TF_CFLAGS) $$(CFLAGS) $(BL_CFLAGS) -D$(IMAGE) $(MAKE_DEP) -c $$< -o $$@
+	$$(Q)$$(CC) $$(LTO_CFLAGS) $$(TF_CFLAGS) $$(CFLAGS) $(BL_CFLAGS) -D$(IMAGE) $(MAKE_DEP) -c $$< -o $$@
 
 -include $(DEP)
 
@@ -433,6 +433,10 @@
 		--map --list="$(MAPFILE)" --scatter=${PLAT_DIR}/scat/bl${1}.scat \
 		$(LDPATHS) $(LIBWRAPPER) $(LDLIBS) $(BL_LIBS) \
 		$(BUILD_DIR)/build_message.o $(OBJS)
+else ifneq ($(findstring gcc,$(notdir $(LD))),)
+	$$(Q)$$(LD) -o $$@ $$(TF_LDFLAGS) $$(LDFLAGS) -Wl,-Map=$(MAPFILE) \
+		-Wl,-T$(LINKERFILE) $(BUILD_DIR)/build_message.o \
+		$(OBJS) $(LDPATHS) $(LIBWRAPPER) $(LDLIBS) $(BL_LIBS)
 else
 	$$(Q)$$(LD) -o $$@ $$(TF_LDFLAGS) $$(LDFLAGS) -Map=$(MAPFILE) \
 		--script $(LINKERFILE) $(BUILD_DIR)/build_message.o \
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index b7fb173..348b3e5 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -239,3 +239,6 @@
 # implementation variant using the ARMv8.1-LSE compare-and-swap instruction.
 # Default: disabled
 USE_SPINLOCK_CAS := 0
+
+# Enable Link Time Optimization
+ENABLE_LTO			:= 0