spl: Add support for a relocating jump to the next phase
When one xPL phase wants to jump to the next, the next phase must be
loaded into its required address. This means that the TEXT_BASE for the
two phases must be different and there cannot be any memory overlap
between the code used by the two phases. It also can mean that phases
need to be moved around to accommodate any size growth.
Having two xPL phases in SRAM at the same time can be tricky if SRAM
is limited, which it often is. It would be better if the second phase
could be loaded somewhere else, then decompressed into place over the
top of the first phase.
Introduce a relocating jump for xPL to support this. This selects a
suitable place to load the (typically compressed) next phase, copies
some decompression code out of the first phase, then jumps to this code
to decompress and start the next phase.
This feature makes it much easier to support Verified Boot for Embedded
(VBE) on RK3399 boards, which have 192KB of SRAM.
Signed-off-by: Simon Glass <sjg@chromium.org>
diff --git a/common/spl/Kconfig b/common/spl/Kconfig
index 4e56d99..94e118f 100644
--- a/common/spl/Kconfig
+++ b/common/spl/Kconfig
@@ -983,6 +983,14 @@
help
SPL uses the chip ID list to identify the NAND flash.
+config SPL_RELOC_LOADER
+ bool "Allow relocating the next phase"
+ help
+ In some cases multiple U-Boot phases need to run in SRAM, typically
+ at the same address. Enable this to support loading the next phase
+ to temporary memory, then copying it into place afterwards, then
+ jumping to it.
+
config SPL_UBI
bool "Support UBI"
help
diff --git a/common/spl/Kconfig.tpl b/common/spl/Kconfig.tpl
index 92d4d43..22ca701 100644
--- a/common/spl/Kconfig.tpl
+++ b/common/spl/Kconfig.tpl
@@ -268,6 +268,14 @@
be already in memory when TPL takes over, e.g. loaded by the boot
ROM.
+config TPL_RELOC_LOADER
+ bool "Allow relocating the next phase"
+ help
+ In some cases multiple U-Boot phases need to run in SRAM, typically
+ at the same address. Enable this to support loading the next phase
+ to temporary memory, then copying it into place afterwards, then
+ jumping to it.
+
config TPL_RTC
bool "Support RTC drivers"
help
diff --git a/common/spl/Kconfig.vpl b/common/spl/Kconfig.vpl
index eb57dfa..97dfc63 100644
--- a/common/spl/Kconfig.vpl
+++ b/common/spl/Kconfig.vpl
@@ -181,6 +181,14 @@
necessary driver support. This enables the drivers in drivers/pci
as part of a VPL build.
+config VPL_RELOC_LOADER
+ bool "Allow relocating the next phase"
+ help
+ In some cases multiple U-Boot phases need to run in SRAM, typically
+ at the same address. Enable this to support loading the next phase
+ to temporary memory, then copying it into place afterwards, then
+ jumping to it.
+
config VPL_RTC
bool "Support RTC drivers"
help
diff --git a/common/spl/Makefile b/common/spl/Makefile
index 75123eb..4c9482b 100644
--- a/common/spl/Makefile
+++ b/common/spl/Makefile
@@ -12,6 +12,7 @@
obj-$(CONFIG_$(PHASE_)LOAD_FIT) += spl_fit.o
obj-$(CONFIG_$(PHASE_)BLK_FS) += spl_blk_fs.o
obj-$(CONFIG_$(PHASE_)LEGACY_IMAGE_FORMAT) += spl_legacy.o
+obj-$(CONFIG_$(PHASE_)RELOC_LOADER) += spl_reloc.o
obj-$(CONFIG_$(PHASE_)NOR_SUPPORT) += spl_nor.o
obj-$(CONFIG_$(PHASE_)XIP_SUPPORT) += spl_xip.o
obj-$(CONFIG_$(PHASE_)YMODEM_SUPPORT) += spl_ymodem.o
diff --git a/common/spl/spl_reloc.c b/common/spl/spl_reloc.c
new file mode 100644
index 0000000..be8349b
--- /dev/null
+++ b/common/spl/spl_reloc.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2024 Google LLC
+ * Written by Simon Glass <sjg@chromium.org>
+ */
+
+#include <gzip.h>
+#include <image.h>
+#include <log.h>
+#include <mapmem.h>
+#include <spl.h>
+#include <asm/global_data.h>
+#include <asm/io.h>
+#include <asm/sections.h>
+#include <asm/unaligned.h>
+#include <linux/types.h>
+#include <lzma/LzmaTypes.h>
+#include <lzma/LzmaDec.h>
+#include <lzma/LzmaTools.h>
+#include <u-boot/crc.h>
+#include <u-boot/lz4.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+/* provide a way to jump straight into the relocation code, for debugging */
+#define DEBUG_JUMP 0
+
+enum {
+ /* margin to allow for stack growth */
+ RELOC_STACK_MARGIN = 0x800,
+
+ /* align base address for DMA controllers which require it */
+ BASE_ALIGN = 0x200,
+
+ STACK_PROT_VALUE = 0x51ce4697,
+};
+
+typedef int (*rcode_func)(struct spl_image_info *image);
+
+static int setup_layout(struct spl_image_info *image, ulong *addrp)
+{
+ ulong base, fdt_size;
+ ulong limit, rcode_base;
+ uint rcode_size;
+ int buf_size, margin;
+ char *rcode_buf;
+
+ limit = ALIGN(map_to_sysmem(&limit) - RELOC_STACK_MARGIN, 8);
+ image->stack_prot = map_sysmem(limit, sizeof(uint));
+ *image->stack_prot = STACK_PROT_VALUE;
+
+ fdt_size = fdt_totalsize(gd->fdt_blob);
+ base = ALIGN(map_to_sysmem(gd->fdt_blob) + fdt_size + BASE_ALIGN - 1,
+ BASE_ALIGN);
+
+ rcode_size = _rcode_end - _rcode_start;
+ rcode_base = limit - rcode_size;
+ buf_size = rcode_base - base;
+ uint need_size = image->size + image->fdt_size;
+ margin = buf_size - need_size;
+ log_debug("spl_reloc %s->%s: margin%s%lx limit %lx fdt_size %lx base %lx avail %x image %x fdt %lx need %x\n",
+ spl_phase_name(spl_phase()), spl_phase_name(spl_phase() + 1),
+ margin >= 0 ? " " : " -", abs(margin), limit, fdt_size, base,
+ buf_size, image->size, image->fdt_size, need_size);
+ if (margin < 0) {
+ log_err("Image size %x but buffer is only %x\n", need_size,
+ buf_size);
+ return -ENOSPC;
+ }
+
+ rcode_buf = map_sysmem(rcode_base, rcode_size);
+ log_debug("_rcode_start %p: %x -- func %p %x\n", _rcode_start,
+ *(uint *)_rcode_start, setup_layout, *(uint *)setup_layout);
+
+ image->reloc_offset = rcode_buf - _rcode_start;
+ log_debug("_rcode start %lx base %lx size %x offset %lx\n",
+ (ulong)map_to_sysmem(_rcode_start), rcode_base, rcode_size,
+ image->reloc_offset);
+
+ memcpy(rcode_buf, _rcode_start, rcode_size);
+
+ image->buf = map_sysmem(base, need_size);
+ image->fdt_buf = image->buf + image->size;
+ image->rcode_buf = rcode_buf;
+ *addrp = base;
+
+ return 0;
+}
+
+int spl_reloc_prepare(struct spl_image_info *image, ulong *addrp)
+{
+ int ret;
+
+ ret = setup_layout(image, addrp);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+typedef void __noreturn (*image_entry_noargs_t)(uint crc, uint unc_len);
+
+/* this is the relocation + jump code that is copied to the top of memory */
+__rcode int rcode_reloc_and_jump(struct spl_image_info *image)
+{
+ image_entry_noargs_t entry = (image_entry_noargs_t)image->entry_point;
+ u32 *dst;
+ ulong image_len;
+ size_t unc_len;
+ int ret, crc;
+ uint magic;
+
+ dst = map_sysmem(image->load_addr, image->size);
+ unc_len = (void *)image->rcode_buf - (void *)dst;
+ image_len = image->size;
+ if (*image->stack_prot != STACK_PROT_VALUE)
+ return -EFAULT;
+ magic = get_unaligned_le32(image->buf);
+ if (CONFIG_IS_ENABLED(LZMA)) {
+ SizeT lzma_len = unc_len;
+
+ ret = lzmaBuffToBuffDecompress((u8 *)dst, &lzma_len,
+ image->buf, image_len);
+ unc_len = lzma_len;
+ } else if (CONFIG_IS_ENABLED(GZIP)) {
+ ret = gunzip(dst, unc_len, image->buf, &image_len);
+ } else if (CONFIG_IS_ENABLED(LZ4) && magic == LZ4F_MAGIC) {
+ ret = ulz4fn(image->buf, image_len, dst, &unc_len);
+ if (ret)
+ return ret;
+ } else {
+ u32 *src, *end, *ptr;
+
+ unc_len = image->size;
+ for (src = image->buf, end = (void *)src + image->size,
+ ptr = dst; src < end;)
+ *ptr++ = *src++;
+ }
+ if (*image->stack_prot != STACK_PROT_VALUE)
+ return -EFAULT;
+
+ /* copy in the FDT if needed */
+ if (image->fdt_size)
+ memcpy(image->fdt_start, image->fdt_buf, image->fdt_size);
+
+ crc = crc8(0, (u8 *)dst, unc_len);
+
+ /* jump to the entry point */
+ entry(crc, unc_len);
+}
+
+int spl_reloc_jump(struct spl_image_info *image, spl_jump_to_image_t jump)
+{
+ rcode_func loader;
+ int ret;
+
+ log_debug("malloc usage %lx bytes (%ld KB of %d KB)\n", gd->malloc_ptr,
+ gd->malloc_ptr / 1024, CONFIG_VAL(SYS_MALLOC_F_LEN) / 1024);
+
+ if (*image->stack_prot != STACK_PROT_VALUE) {
+ log_err("stack busted, cannot continue\n");
+ return -EFAULT;
+ }
+ loader = (rcode_func)(void *)rcode_reloc_and_jump + image->reloc_offset;
+ log_debug("Jumping via %p to %lx - image %p size %x load %lx\n", loader,
+ image->entry_point, image, image->size, image->load_addr);
+
+ log_debug("unc_len %lx\n",
+ image->rcode_buf - map_sysmem(image->load_addr, image->size));
+ if (DEBUG_JUMP) {
+ rcode_reloc_and_jump(image);
+ } else {
+ /*
+ * Must disable LOG_DEBUG since the decompressor cannot call
+ * log functions, printf(), etc.
+ */
+ _Static_assert(DEBUG_JUMP || !_DEBUG,
+ "Cannot have debug output from decompressor");
+ ret = loader(image);
+ }
+
+ return -EFAULT;
+}