ARM: Tegra186: search for best RAM bank

In the future, the list of DRAM regions passed to U-Boot in the DTB may
be quite long and fragmented. Due to this, U-Boot must search through the
regions to find the best region to relocate into, rather than relying on
the current assumption that the top of bank 0 is a reasonable relocation
target. This change implements such searching.

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Tom Warren <twarren@nvidia.com>
diff --git a/arch/arm/mach-tegra/tegra186/nvtboot_mem.c b/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
index 5a165a9..33e331f 100644
--- a/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
+++ b/arch/arm/mach-tegra/tegra186/nvtboot_mem.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.
+ * Copyright (c) 2016-2018, NVIDIA CORPORATION.
  *
  * SPDX-License-Identifier: GPL-2.0+
  */
@@ -9,27 +9,51 @@
 #include <fdtdec.h>
 #include <asm/arch/tegra.h>
 
+#define SZ_4G 0x100000000ULL
+
+/*
+ * Size of a region that's large enough to hold the relocated U-Boot and all
+ * other allocations made around it (stack, heap, page tables, etc.)
+ * In practice, running "bdinfo" at the shell prompt, the stack reaches about
+ * 5MB from the address selected for ram_top as of the time of writing,
+ * so a 16MB region should be plenty.
+ */
+#define MIN_USABLE_RAM_SIZE SZ_16M
+/*
+ * The amount of space we expect to require for stack usage. Used to validate
+ * that all reservations fit into the region selected for the relocation target
+ */
+#define MIN_USABLE_STACK_SIZE SZ_1M
+
 DECLARE_GLOBAL_DATA_PTR;
 
 extern unsigned long nvtboot_boot_x0;
 
 /*
- * A parsed version of /memory/reg from the DTB that is passed to U-Boot in x0.
- *
- * We assume bank 0 is RAM completely below 4G mostly ignore other banks;
- * assuming they contain RAM above 4G. This is all a fairly safe assumption,
- * since the L4T kernel makes the same assumption, so the bootloader is
- * unlikely to change.
- *
- * This is written to before relocation, and hence cannot be in .bss, since
- * .bss overlaps the DTB that's appended to the U-Boot binary. The initializer
- * forces this into .data and avoids this issue. This also has the nice side-
- * effect of the content being valid after relocation.
+ * These variables are written to before relocation, and hence cannot be
+ * in.bss, since .bss overlaps the DTB that's appended to the U-Boot binary.
+ * The section attribute forces this into .data and avoids this issue. This
+ * also has the nice side-effect of the content being valid after relocation.
  */
+
+/* A parsed version of /memory/reg from the DTB passed to U-Boot in x0 */
 static struct {
 	u64 start;
 	u64 size;
-} ram_banks[CONFIG_NR_DRAM_BANKS] = {{1}};
+} ram_banks[CONFIG_NR_DRAM_BANKS] __attribute__((section(".data")));
+
+/* The number of valid entries in ram_banks[] */
+static int ram_bank_count __attribute__((section(".data")));
+
+/*
+ * The usable top-of-RAM for U-Boot. This is both:
+ * a) Below 4GB to avoid issues with peripherals that use 32-bit addressing.
+ * b) At the end of a region that has enough space to hold the relocated U-Boot
+ *    and all other allocations made around it (stack, heap, page tables, etc.)
+ */
+static u64 ram_top __attribute__((section(".data")));
+/* The base address of the region of RAM that ends at ram_top */
+static u64 region_base __attribute__((section(".data")));
 
 int dram_init(void)
 {
@@ -59,32 +83,58 @@
 	len /= (na + ns);	/* Convert cells to number of banks */
 	if (len > ARRAY_SIZE(ram_banks))
 		len = ARRAY_SIZE(ram_banks);
+	ram_bank_count = len;
 
 	gd->ram_size = 0;
-	for (i = 0; i < len; i++) {
+	for (i = 0; i < ram_bank_count; i++) {
+		u64 bank_end, usable_bank_size;
+
 		ram_banks[i].start = fdt_read_number(prop, na);
 		prop += na;
 		ram_banks[i].size = fdt_read_number(prop, ns);
 		prop += ns;
 		gd->ram_size += ram_banks[i].size;
+		debug("Bank %d: start: %llx size: %llx\n", i,
+		      ram_banks[i].start, ram_banks[i].size);
+
+		bank_end = ram_banks[i].start + ram_banks[i].size;
+		debug("  end  %llx\n", bank_end);
+		if (bank_end > SZ_4G)
+			bank_end = SZ_4G;
+		debug("  end  %llx (usable)\n", bank_end);
+		usable_bank_size = bank_end - ram_banks[i].start;
+		debug("  size %llx (usable)\n", usable_bank_size);
+		if ((usable_bank_size >= MIN_USABLE_RAM_SIZE) &&
+		    (bank_end > ram_top)) {
+			ram_top = bank_end;
+			region_base = ram_banks[i].start;
+			debug("ram top now %llx\n", ram_top);
+		}
+	}
+	if (!ram_top) {
+		pr_err("Can't find a usable RAM top");
+		hang();
 	}
 
 	return 0;
 }
 
-extern unsigned long nvtboot_boot_x0;
-
 int dram_init_banksize(void)
 {
 	int i;
 
+	if ((gd->start_addr_sp - region_base) < MIN_USABLE_STACK_SIZE) {
+		pr_err("Reservations exceed chosen region size");
+		hang();
+	}
+
-	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+	for (i = 0; i < ram_bank_count; i++) {
 		gd->bd->bi_dram[i].start = ram_banks[i].start;
 		gd->bd->bi_dram[i].size = ram_banks[i].size;
 	}
 
 #ifdef CONFIG_PCI
-	gd->pci_ram_top = gd->bd->bi_dram[0].start + gd->bd->bi_dram[0].size;
+	gd->pci_ram_top = ram_top;
 #endif
 
 	return 0;
@@ -92,5 +142,5 @@
 
 ulong board_get_usable_ram_top(ulong total_size)
 {
-	return ram_banks[0].start + ram_banks[0].size;
+	return ram_top;
 }