bl31: Split into two separate memory regions

Some platforms are extremely memory constrained and must split BL31
between multiple non-contiguous areas in SRAM. Allow the NOBITS
sections (.bss, stacks, page tables, and coherent memory) to be placed
in a separate region of RAM from the loaded firmware image.

Because the NOBITS region may be at a lower address than the rest of
BL31, __RW_{START,END}__ and __BL31_{START,END}__ cannot include this
region, or el3_entrypoint_common would attempt to invalidate the dcache
for the entire address space. New symbols __NOBITS_{START,END}__ are
added when SEPARATE_NOBITS_REGION is enabled, and the dcached for the
NOBITS region is invalidated separately.

Signed-off-by: Samuel Holland <samuel@sholland.org>
Change-Id: Idedfec5e4dbee77e94f2fdd356e6ae6f4dc79d37
diff --git a/Makefile b/Makefile
index 073c2ed..42fdb73 100644
--- a/Makefile
+++ b/Makefile
@@ -736,6 +736,7 @@
 $(eval $(call assert_boolean,RESET_TO_BL31))
 $(eval $(call assert_boolean,SAVE_KEYS))
 $(eval $(call assert_boolean,SEPARATE_CODE_AND_RODATA))
+$(eval $(call assert_boolean,SEPARATE_NOBITS_REGION))
 $(eval $(call assert_boolean,SPIN_ON_BL1_EXIT))
 $(eval $(call assert_boolean,SPM_MM))
 $(eval $(call assert_boolean,TRUSTED_BOARD_BOOT))
@@ -800,6 +801,7 @@
 $(eval $(call add_define,RAS_EXTENSION))
 $(eval $(call add_define,RESET_TO_BL31))
 $(eval $(call add_define,SEPARATE_CODE_AND_RODATA))
+$(eval $(call add_define,SEPARATE_NOBITS_REGION))
 $(eval $(call add_define,RECLAIM_INIT_CODE))
 $(eval $(call add_define,SPD_${SPD}))
 $(eval $(call add_define,SPIN_ON_BL1_EXIT))
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index a598e59..42227f0 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -15,6 +15,11 @@
 
 MEMORY {
     RAM (rwx): ORIGIN = BL31_BASE, LENGTH = BL31_LIMIT - BL31_BASE
+#if SEPARATE_NOBITS_REGION
+    NOBITS (rw!a): ORIGIN = BL31_NOBITS_BASE, LENGTH = BL31_NOBITS_LIMIT - BL31_NOBITS_BASE
+#else
+#define NOBITS RAM
+#endif
 }
 
 #ifdef PLAT_EXTRA_LD_SCRIPT
@@ -198,11 +203,28 @@
     ASSERT(. <= BL31_PROGBITS_LIMIT, "BL31 progbits has exceeded its limit.")
 #endif
 
+#if SEPARATE_NOBITS_REGION
+    /*
+     * Define a linker symbol to mark end of the RW memory area for this
+     * image.
+     */
+    __RW_END__ = .;
+    __BL31_END__ = .;
+
+    ASSERT(. <= BL31_LIMIT, "BL31 image has exceeded its limit.")
+
+    . = BL31_NOBITS_BASE;
+    ASSERT(. == ALIGN(PAGE_SIZE),
+           "BL31 NOBITS base address is not aligned on a page boundary.")
+
+    __NOBITS_START__ = .;
+#endif
+
     stacks (NOLOAD) : {
         __STACKS_START__ = .;
         *(tzfw_normal_stacks)
         __STACKS_END__ = .;
-    } >RAM
+    } >NOBITS
 
     /*
      * The .bss section gets initialised to 0 at runtime.
@@ -262,7 +284,7 @@
         __PMF_TIMESTAMP_END__ = .;
 #endif /* ENABLE_PMF */
         __BSS_END__ = .;
-    } >RAM
+    } >NOBITS
 
     /*
      * The xlat_table section is for full, aligned page tables (4K).
@@ -272,7 +294,7 @@
      */
     xlat_table (NOLOAD) : {
         *(xlat_table)
-    } >RAM
+    } >NOBITS
 
 #if USE_COHERENT_MEM
     /*
@@ -298,9 +320,18 @@
          */
         . = ALIGN(PAGE_SIZE);
         __COHERENT_RAM_END__ = .;
-    } >RAM
+    } >NOBITS
 #endif
 
+#if SEPARATE_NOBITS_REGION
+    /*
+     * Define a linker symbol to mark end of the NOBITS memory area for this
+     * image.
+     */
+    __NOBITS_END__ = .;
+
+    ASSERT(. <= BL31_NOBITS_LIMIT, "BL31 NOBITS region has exceeded its limit.")
+#else
     /*
      * Define a linker symbol to mark end of the RW memory area for this
      * image.
@@ -309,4 +340,5 @@
     __BL31_END__ = .;
 
     ASSERT(. <= BL31_LIMIT, "BL31 image has exceeded its limit.")
+#endif
 }
diff --git a/docs/design/firmware-design.rst b/docs/design/firmware-design.rst
index cae94b5..5fc1335 100644
--- a/docs/design/firmware-design.rst
+++ b/docs/design/firmware-design.rst
@@ -1500,6 +1500,11 @@
 this NOBITS section, making the image unnecessarily bigger. Smaller images
 allow faster loading from the FIP to the main memory.
 
+For BL31, a platform can specify an alternate location for NOBITS sections
+(other than immediately following PROGBITS sections) by setting
+``SEPARATE_NOBITS_REGION`` to 1 and defining ``BL31_NOBITS_BASE`` and
+``BL31_NOBITS_LIMIT``.
+
 Linker scripts and symbols
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 37c28a5..d7bb044 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -491,6 +491,13 @@
    pages" section in :ref:`Firmware Design`. This flag is disabled by default and
    affects all BL images.
 
+-  ``SEPARATE_NOBITS_REGION``: Setting this option to ``1`` allows the NOBITS
+   sections of BL31 (.bss, stacks, page tables, and coherent memory) to be
+   allocated in RAM discontiguous from the loaded firmware image. When set, the
+   platform is expected to provide definitons for ``BL31_NOBITS_BASE`` and
+   ``BL31_NOBITS_LIMIT``. When the option is ``0`` (the default), NOBITS
+   sections are placed in RAM immediately following the loaded firmware image.
+
 -  ``SPD``: Choose a Secure Payload Dispatcher component to be built into TF-A.
    This build option is only valid if ``ARCH=aarch64``. The value should be
    the path to the directory containing the SPD source, relative to
diff --git a/include/arch/aarch64/el3_common_macros.S b/include/arch/aarch64/el3_common_macros.S
index b14b7b6..156b18a 100644
--- a/include/arch/aarch64/el3_common_macros.S
+++ b/include/arch/aarch64/el3_common_macros.S
@@ -382,6 +382,14 @@
 		add	x1, x1, :lo12:__RW_END__
 		sub	x1, x1, x0
 		bl	inv_dcache_range
+#if defined(IMAGE_BL31) && SEPARATE_NOBITS_REGION
+		adrp	x0, __NOBITS_START__
+		add	x0, x0, :lo12:__NOBITS_START__
+		adrp	x1, __NOBITS_END__
+		add	x1, x1, :lo12:__NOBITS_END__
+		sub	x1, x1, x0
+		bl	inv_dcache_range
+#endif
 #endif
 		adrp	x0, __BSS_START__
 		add	x0, x0, :lo12:__BSS_START__
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index a1f9db9..53832c5 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -171,6 +171,10 @@
 # platform Makefile is free to override this value.
 SEPARATE_CODE_AND_RODATA	:= 0
 
+# Put NOBITS sections (.bss, stacks, page tables, and coherent memory) in a
+# separate memory region, which may be discontiguous from the rest of BL31.
+SEPARATE_NOBITS_REGION		:= 0
+
 # If the BL31 image initialisation code is recalimed after use for the secondary
 # cores stack
 RECLAIM_INIT_CODE		:= 0