mach-snapdragon: parse memory ourselves

The generic memory parsing code in U-Boot lacks a few things that we
need on Qualcomm:

1. It sets gd->ram_size and gd->ram_base to represent a single memory
   block.
2. setup_dest_addr() later relocates U-Boot to ram_base + ram_size, the
   end of that first memory block.

This results in all memory beyond U-Boot being unusable in Linux when
booting with EFI.

Since the ranges in the memory node may be out of order, the only way
for us to correctly determine the relocation address for U-Boot is to
parse all memory regions and find the highest valid address.

We can't use fdtdec_setup_memory_banksize() since it stores the result
in gd->bd which is not yet allocated.

Hence, this commit, which implements an optimised parser to read the
memory blocks and store them in the .data section where they will
survive relocation.

We set ram_base and ram_size to describe the entire address space of
memory, with the assumption that the last memory region is big enough
for U-Boot, its DTB, and heap. On all boards tested so far this seems
to be a reasonable assumption.

As a nice side effect, our fdt parsing also winds up being faster since
we avoid the overhead of checking address/size-cells or populating
struct resource. We can safely make these optimisations since we only
support ARM64, and trust the reg property to be populated correctly.

After relocation, we then populate gd->bd->bi_dram with the data we
parsed earlier.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Caleb Connolly <caleb.connolly@linaro.org>
diff --git a/arch/arm/mach-snapdragon/board.c b/arch/arm/mach-snapdragon/board.c
index 92492c3..93de516 100644
--- a/arch/arm/mach-snapdragon/board.c
+++ b/arch/arm/mach-snapdragon/board.c
@@ -38,9 +38,18 @@
 
 struct mm_region *mem_map = rbx_mem_map;
 
+static struct {
+	phys_addr_t start;
+	phys_size_t size;
+} prevbl_ddr_banks[CONFIG_NR_DRAM_BANKS] __section(".data") = { 0 };
+
 int dram_init(void)
 {
-	return fdtdec_setup_mem_size_base();
+	/*
+	 * gd->ram_base / ram_size have been setup already
+	 * in qcom_parse_memory().
+	 */
+	return 0;
 }
 
 static int ddr_bank_cmp(const void *v1, const void *v2)
@@ -58,21 +67,69 @@
 	return (res1->start >> 24) - (res2->start >> 24);
 }
 
+/* This has to be done post-relocation since gd->bd isn't preserved */
+static void qcom_configure_bi_dram(void)
+{
+	int i;
+
+	for (i = 0; i < CONFIG_NR_DRAM_BANKS; i++) {
+		gd->bd->bi_dram[i].start = prevbl_ddr_banks[i].start;
+		gd->bd->bi_dram[i].size = prevbl_ddr_banks[i].size;
+	}
+}
+
 int dram_init_banksize(void)
 {
-	int ret;
+	qcom_configure_bi_dram();
 
-	ret = fdtdec_setup_memory_banksize();
-	if (ret < 0)
-		return ret;
+	return 0;
+}
 
-	if (CONFIG_NR_DRAM_BANKS < 2)
-		return 0;
+static void qcom_parse_memory(void)
+{
+	ofnode node;
+	const fdt64_t *memory;
+	int memsize;
+	phys_addr_t ram_end = 0;
+	int i, j, banks;
+
+	node = ofnode_path("/memory");
+	if (!ofnode_valid(node)) {
+		log_err("No memory node found in device tree!\n");
+		return;
+	}
+	memory = ofnode_read_prop(node, "reg", &memsize);
+	if (!memory) {
+		log_err("No memory configuration was provided by the previous bootloader!\n");
+		return;
+	}
+
+	banks = min(memsize / (2 * sizeof(u64)), (ulong)CONFIG_NR_DRAM_BANKS);
+
+	if (memsize / sizeof(u64) > CONFIG_NR_DRAM_BANKS * 2)
+		log_err("Provided more than the max of %d memory banks\n", CONFIG_NR_DRAM_BANKS);
+
+	if (banks > CONFIG_NR_DRAM_BANKS)
+		log_err("Provided more memory banks than we can handle\n");
+
+	for (i = 0, j = 0; i < banks * 2; i += 2, j++) {
+		prevbl_ddr_banks[j].start = get_unaligned_be64(&memory[i]);
+		prevbl_ddr_banks[j].size = get_unaligned_be64(&memory[i + 1]);
+		/* SM8650 boards sometimes have empty regions! */
+		if (!prevbl_ddr_banks[j].size) {
+			j--;
+			continue;
+		}
+		ram_end = max(ram_end, prevbl_ddr_banks[j].start + prevbl_ddr_banks[j].size);
+	}
 
 	/* Sort our RAM banks -_- */
-	qsort(gd->bd->bi_dram, CONFIG_NR_DRAM_BANKS, sizeof(gd->bd->bi_dram[0]), ddr_bank_cmp);
+	qsort(prevbl_ddr_banks, banks, sizeof(prevbl_ddr_banks[0]), ddr_bank_cmp);
 
-	return 0;
+	gd->ram_base = prevbl_ddr_banks[0].start;
+	gd->ram_size = ram_end - gd->ram_base;
+	debug("ram_base = %#011lx, ram_size = %#011llx, ram_end = %#011llx\n",
+	      gd->ram_base, gd->ram_size, ram_end);
 }
 
 static void show_psci_version(void)
@@ -110,11 +167,19 @@
 
 	if (internal_valid) {
 		debug("Using built in FDT\n");
-		return (void *)gd->fdt_blob;
 	} else {
 		debug("Using external FDT\n");
-		return (void *)fdt;
+		/* So we can use it before returning */
+		gd->fdt_blob = fdt;
 	}
+
+	/*
+	 * Parse the /memory node while we're here,
+	 * this makes it easy to do other things early.
+	 */
+	qcom_parse_memory();
+
+	return (void *)gd->fdt_blob;
 }
 
 void reset_cpu(void)