Merge changes from topic "fpga_generic" into integration

* changes:
  arm_fpga: Add platform documentation
  arm_fpga: Add post-build linker script
  arm_fpga: Add ROM trampoline
  arm_fpga: Add devicetree file
  arm_fpga: Remove SPE PMU DT node if SPE is not available
  arm_fpga: Adjust GICR size in DT to match number of cores
  fdt: Add function to adjust GICv3 redistributor size
  drivers: arm: gicv3: Allow detecting number of cores
diff --git a/common/fdt_fixup.c b/common/fdt_fixup.c
index 980e60d..a1604e7 100644
--- a/common/fdt_fixup.c
+++ b/common/fdt_fixup.c
@@ -377,3 +377,64 @@
 
 	return offs;
 }
+
+/**
+ * fdt_adjust_gic_redist() - Adjust GICv3 redistributor size
+ * @dtb: Pointer to the DT blob in memory
+ * @nr_cores: Number of CPU cores on this system.
+ * @gicr_frame_size: Size of the GICR frame per core
+ *
+ * On a GICv3 compatible interrupt controller, the redistributor provides
+ * a number of 64k pages per each supported core. So with a dynamic topology,
+ * this size cannot be known upfront and thus can't be hardcoded into the DTB.
+ *
+ * Find the DT node describing the GICv3 interrupt controller, and adjust
+ * the size of the redistributor to match the number of actual cores on
+ * this system.
+ * A GICv4 compatible redistributor uses four 64K pages per core, whereas GICs
+ * without support for direct injection of virtual interrupts use two 64K pages.
+ * The @gicr_frame_size parameter should be 262144 and 131072, respectively.
+ *
+ * Return: 0 on success, negative error value otherwise.
+ */
+int fdt_adjust_gic_redist(void *dtb, unsigned int nr_cores,
+			  unsigned int gicr_frame_size)
+{
+	int offset = fdt_node_offset_by_compatible(dtb, 0, "arm,gic-v3");
+	uint64_t redist_size_64;
+	uint32_t redist_size_32;
+	void *val;
+	int parent;
+	int ac, sc;
+
+	if (offset < 0) {
+		return offset;
+	}
+
+	parent = fdt_parent_offset(dtb, offset);
+	if (parent < 0) {
+		return parent;
+	}
+	ac = fdt_address_cells(dtb, parent);
+	sc = fdt_size_cells(dtb, parent);
+	if (ac < 0 || sc < 0) {
+		return -EINVAL;
+	}
+
+	if (sc == 1) {
+		redist_size_32 = cpu_to_fdt32(nr_cores * gicr_frame_size);
+		val = &redist_size_32;
+	} else {
+		redist_size_64 = cpu_to_fdt64(nr_cores * gicr_frame_size);
+		val = &redist_size_64;
+	}
+
+	/*
+	 * The redistributor is described in the second "reg" entry.
+	 * So we have to skip one address and one size cell, then another
+	 * address cell to get to the second size cell.
+	 */
+	return fdt_setprop_inplace_namelen_partial(dtb, offset, "reg", 3,
+						   (ac + sc + ac) * 4,
+						   val, sc * 4);
+}
diff --git a/docs/plat/arm/arm_fpga/index.rst b/docs/plat/arm/arm_fpga/index.rst
new file mode 100644
index 0000000..5427c1d
--- /dev/null
+++ b/docs/plat/arm/arm_fpga/index.rst
@@ -0,0 +1,97 @@
+Arm FPGA Platform
+=================
+
+This platform supports FPGA images used internally in Arm Ltd., for
+testing and bringup of new cores. With that focus, peripheral support is
+minimal: there is no mass storage or display output, for instance. Also
+this port ignores any power management features of the platform.
+Some interconnect setup is done internally by the platform, so the TF-A code
+just needs to setup UART and GIC.
+
+The FPGA platform requires to pass on a DTB for the non-secure payload
+(mostly Linux), so we let TF-A use information from the DTB for dynamic
+configuration: the UART and GIC base addresses are read from there.
+
+As a result this port is a fairly generic BL31-only port, which can serve
+as a template for a minimal new (and possibly DT-based) platform port.
+
+The aim of this port is to support as many FPGA images as possible with
+a single build. Image specific data must be described in the DTB or should
+be auto-detected at runtime.
+
+As the number and topology layout of the CPU cores differs significantly
+across the various images, this is detected at runtime by BL31.
+The /cpus node in the DT will be added and filled accordingly, as long as
+it does not exist already.
+
+Platform-specific build options
+-------------------------------
+
+-  ``SUPPORT_UNKNOWN_MPID`` : Boolean option to allow unknown MPIDR registers.
+   Normally TF-A panics if it encounters a MPID value not matched to its
+   internal list, but for new or experimental cores this creates a lot of
+   churn. With this option, the code will fall back to some basic CPU support
+   code (only architectural system registers, and no errata).
+   Default value of this flag is 1.
+
+-  ``PRELOADED_BL33_BASE`` : Physical address of the BL33 non-secure payload.
+   It must have been loaded into DRAM already, typically this is done by
+   the script that also loads BL31 and the DTB.
+   It defaults to 0x80080000, which is the traditional load address for an
+   arm64 Linux kernel.
+
+-  ``FPGA_PRELOADED_DTB_BASE`` : Physical address of the flattened device
+   tree blob (DTB). This DT will be used by TF-A for dynamic configuration,
+   so it must describe at least the UART and a GICv3 interrupt controller.
+   The DT gets amended by the code, to potentially add a command line and
+   fill the CPU topology nodes. It will also be passed on to BL33, by
+   putting its address into the x0 register before jumping to the entry
+   point (following the Linux kernel boot protocol).
+   It defaults to 0x80070000, which is 64KB before the BL33 load address.
+
+-  ``FPGA_PRELOADED_CMD_LINE`` : Physical address of the command line to
+   put into the devicetree blob. Due to the lack of a proper bootloader,
+   a command line can be put somewhere into memory, so that BL31 will
+   detect it and copy it into the DTB passed on to BL33.
+   To avoid random garbage, there needs to be a "CMD:" signature before the
+   actual command line.
+   Defaults to 0x1000, which is normally in the "ROM" space of the typical
+   FPGA image (which can be written by the FPGA payload uploader, but is
+   read-only to the CPU). The FPGA payload tool should be given a text file
+   containing the desired command line, prefixed by the "CMD:" signature.
+
+Building the TF-A image
+-----------------------
+
+   .. code:: shell
+
+       make PLAT=arm_fgpa DEBUG=1
+
+   This will use the default load addresses as described above. When those
+   addresses need to differ for a certain setup, they can be passed on the
+   make command line:
+
+   .. code:: shell
+
+       make PLAT=arm_fgpa DEBUG=1 PRELOADED_BL33_BASE=0x80200000 FPGA_PRELOADED_DTB_BASE=0x80180000 bl31
+
+Running the TF-A image
+----------------------
+
+After building TF-A, the actual TF-A code will be located in ``bl31.bin`` in
+the build directory.
+Additionally there is a ``bl31.axf`` ELF file, which contains BL31, as well
+as some simple ROM trampoline code (required by the Arm FPGA boot flow) and
+a generic DTB to support most of the FPGA images. This can be simply handed
+over to the FPGA payload uploader, which will take care of loading the
+components at their respective load addresses. In addition to this file
+you need at least a BL33 payload (typically a Linux kernel image), optionally
+a Linux initrd image file and possibly a command line:
+
+   .. code:: shell
+
+       fpga-run ... -m bl31.axf -l auto -m Image -l 0x80080000 -m initrd.gz -l 0x84000000 -m cmdline.txt -l 0x1000
+
+--------------
+
+*Copyright (c) 2020, Arm Limited. All rights reserved.*
diff --git a/docs/plat/arm/index.rst b/docs/plat/arm/index.rst
index 1afe475..9c2fcb1 100644
--- a/docs/plat/arm/index.rst
+++ b/docs/plat/arm/index.rst
@@ -9,6 +9,7 @@
    fvp/index
    fvp-ve/index
    tc0/index
+   arm_fpga/index
    arm-build-options
 
 This chapter holds documentation related to Arm's development platforms,
diff --git a/drivers/arm/gic/v3/gicv3_helpers.c b/drivers/arm/gic/v3/gicv3_helpers.c
index 09fa678..ff346f9 100644
--- a/drivers/arm/gic/v3/gicv3_helpers.c
+++ b/drivers/arm/gic/v3/gicv3_helpers.c
@@ -326,3 +326,33 @@
 
 	return ctlr_enable;
 }
+
+/**
+ * gicv3_rdistif_get_number_frames() - determine size of GICv3 GICR region
+ * @gicr_frame: base address of the GICR region to check
+ *
+ * This iterates over the GICR_TYPER registers of multiple GICR frames in
+ * a GICR region, to find the instance which has the LAST bit set. For most
+ * systems this corresponds to the number of cores handled by a redistributor,
+ * but there could be disabled cores among them.
+ * It assumes that each GICR region is fully accessible (till the LAST bit
+ * marks the end of the region).
+ * If a platform has multiple GICR regions, this function would need to be
+ * called multiple times, providing the respective GICR base address each time.
+ *
+ * Return: number of valid GICR frames (at least 1, up to PLATFORM_CORE_COUNT)
+ ******************************************************************************/
+unsigned int gicv3_rdistif_get_number_frames(const uintptr_t gicr_frame)
+{
+	uintptr_t rdistif_base = gicr_frame;
+	unsigned int count;
+
+	for (count = 1; count < PLATFORM_CORE_COUNT; count++) {
+		if ((gicr_read_typer(rdistif_base) & TYPER_LAST_BIT) != 0U) {
+			break;
+		}
+		rdistif_base += (1U << GICR_PCPUBASE_SHIFT);
+	}
+
+	return count;
+}
diff --git a/fdts/arm_fpga.dts b/fdts/arm_fpga.dts
new file mode 100644
index 0000000..6a966fd
--- /dev/null
+++ b/fdts/arm_fpga.dts
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: (GPL-2.0 or BSD-3-Clause)
+/*
+ * Copyright (c) 2020, Arm Limited. All rights reserved.
+ *
+ * Devicetree for the Arm Ltd. FPGA platform
+ * Number and kind of CPU cores differs from image to image, so the
+ * topology is auto-detected by BL31, and the /cpus node is created and
+ * populated accordingly at runtime.
+ */
+
+#include <dt-bindings/interrupt-controller/arm-gic.h>
+
+/dts-v1/;
+
+/ {
+	model = "ARM FPGA";
+	compatible = "arm,fpga", "arm,vexpress";
+	interrupt-parent = <&gic>;
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	aliases {
+		serial0 = &dbg_uart;
+	};
+
+	chosen {
+		stdout-path = "serial0:38400n8";
+		bootargs = "console=ttyAMA0,38400n8 earlycon";
+		/* Allow to upload a generous 100MB initrd payload. */
+		linux,initrd-start = <0x0 0x84000000>;
+		linux,initrd-end = <0x0 0x85400000>;
+	};
+
+	/* /cpus node will be added by BL31 at runtime. */
+
+	psci {
+		compatible = "arm,psci-0.2";
+		method = "smc";
+	};
+
+	timer {
+		compatible = "arm,armv8-timer";
+		clock-frequency = <10000000>;
+		interrupts = <GIC_PPI 13 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 14 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 11 IRQ_TYPE_LEVEL_LOW>,
+			     <GIC_PPI 10 IRQ_TYPE_LEVEL_LOW>;
+	};
+
+	pmu {
+		compatible = "arm,armv8-pmuv3";
+		interrupts = <GIC_PPI 7 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	/* This node will be removed at runtime on cores without SPE. */
+	spe-pmu {
+		compatible = "arm,statistical-profiling-extension-v1";
+		interrupts = <GIC_PPI 5 IRQ_TYPE_LEVEL_HIGH>;
+	};
+
+	memory@80000000 {
+		device_type = "memory";
+		reg = <0x0 0x80000000 0x0 0x80000000>,
+		      <0x8 0x80000000 0x1 0x80000000>;
+	};
+
+
+	bus_refclk: refclk {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <100000000>;
+		clock-output-names = "apb_pclk";
+	};
+
+	uartclk: baudclock {
+		compatible = "fixed-clock";
+		#clock-cells = <0>;
+		clock-frequency = <10000000>;
+		clock-output-names = "uartclk";
+	};
+
+	dbg_uart: serial@7ff80000 {
+		compatible = "arm,pl011", "arm,primecell";
+		reg = <0x0 0x7ff80000 0x0 0x00001000>;
+		interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&uartclk>, <&bus_refclk>;
+		clock-names = "uartclk", "apb_pclk";
+	};
+
+	gic: interrupt-controller@30000000 {
+		compatible = "arm,gic-v3";
+		#address-cells = <2>;
+		#interrupt-cells = <3>;
+		#size-cells = <2>;
+		ranges;
+		interrupt-controller;
+		reg = <0x0 0x30000000 0x0 0x00010000>,	/* GICD */
+	/* The GICR size will be adjusted at runtime to match the cores. */
+		      <0x0 0x30040000 0x0 0x00020000>;	/* GICR for one core */
+		interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+	};
+};
diff --git a/include/common/fdt_fixup.h b/include/common/fdt_fixup.h
index 29d8b3a..2e9d49d 100644
--- a/include/common/fdt_fixup.h
+++ b/include/common/fdt_fixup.h
@@ -13,5 +13,7 @@
 			    uintptr_t base, size_t size);
 int fdt_add_cpus_node(void *dtb, unsigned int afflv0,
 		      unsigned int afflv1, unsigned int afflv2);
+int fdt_adjust_gic_redist(void *dtb, unsigned int nr_cores,
+			  unsigned int gicr_frame_size);
 
 #endif /* FDT_FIXUP_H */
diff --git a/include/drivers/arm/gicv3.h b/include/drivers/arm/gicv3.h
index 18d5b73..d8ac4cb 100644
--- a/include/drivers/arm/gicv3.h
+++ b/include/drivers/arm/gicv3.h
@@ -488,6 +488,7 @@
 void gicv3_rdistif_init(unsigned int proc_num);
 void gicv3_rdistif_on(unsigned int proc_num);
 void gicv3_rdistif_off(unsigned int proc_num);
+unsigned int gicv3_rdistif_get_number_frames(const uintptr_t gicr_frame);
 void gicv3_cpuif_enable(unsigned int proc_num);
 void gicv3_cpuif_disable(unsigned int proc_num);
 unsigned int gicv3_get_pending_interrupt_type(void);
diff --git a/plat/arm/board/arm_fpga/build_axf.ld.S b/plat/arm/board/arm_fpga/build_axf.ld.S
new file mode 100644
index 0000000..d7cd008
--- /dev/null
+++ b/plat/arm/board/arm_fpga/build_axf.ld.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020, ARM Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Linker script for the Arm Ltd. FPGA boards to generate an ELF file that
+ * contains the ROM trampoline, BL31 and the DTB.
+ *
+ * This allows to pass just one file to the uploader tool, and automatically
+ * provides the correct load addresses.
+ */
+
+#include <platform_def.h>
+
+OUTPUT_FORMAT("elf64-littleaarch64")
+OUTPUT_ARCH(aarch64)
+
+INPUT(./bl31/bl31.elf)
+INPUT(./rom_trampoline.o)
+
+TARGET(binary)
+INPUT(./fdts/arm_fpga.dtb)
+
+ENTRY(_start)
+
+SECTIONS
+{
+	.rom (0x0): {
+		*rom_trampoline.o(.text*)
+		KEEP(*(.rom))
+	}
+
+	.bl31 (BL31_BASE): {
+		ASSERT(. == ALIGN(PAGE_SIZE), "BL31_BASE is not page aligned");
+		*bl31.elf(.text* .data* .rodata* ro* .bss*)
+		*bl31.elf(.stack)
+	}
+
+	.dtb (FPGA_PRELOADED_DTB_BASE): {
+		ASSERT(. == ALIGN(8), "DTB address is not 8-byte aligned");
+		*arm_fpga.dtb
+	}
+
+	/DISCARD/ : { *(.debug_*) }
+	/DISCARD/ : { *(.note*) }
+	/DISCARD/ : { *(.comment*) }
+}
diff --git a/plat/arm/board/arm_fpga/fpga_bl31_setup.c b/plat/arm/board/arm_fpga/fpga_bl31_setup.c
index de6d9d5..a5f5ea0 100644
--- a/plat/arm/board/arm_fpga/fpga_bl31_setup.c
+++ b/plat/arm/board/arm_fpga/fpga_bl31_setup.c
@@ -9,8 +9,10 @@
 
 #include <common/fdt_fixup.h>
 #include <common/fdt_wrappers.h>
+#include <drivers/arm/gicv3.h>
 #include <drivers/delay_timer.h>
 #include <drivers/generic_delay_timer.h>
+#include <lib/extensions/spe.h>
 #include <libfdt.h>
 
 #include "fpga_private.h"
@@ -210,6 +212,26 @@
 		if (err < 0) {
 			ERROR("Error %d creating the /cpus DT node\n", err);
 			panic();
+		} else {
+			unsigned int nr_cores = fpga_get_nr_gic_cores();
+
+			INFO("Adjusting GICR DT region to cover %u cores\n",
+			      nr_cores);
+			err = fdt_adjust_gic_redist(fdt, nr_cores,
+						    1U << GICR_PCPUBASE_SHIFT);
+			if (err < 0) {
+				ERROR("Error %d fixing up GIC DT node\n", err);
+			}
+		}
+	}
+
+	/* Check whether we support the SPE PMU. Remove the DT node if not. */
+	if (!spe_supported()) {
+		int node = fdt_node_offset_by_compatible(fdt, 0,
+				     "arm,statistical-profiling-extension-v1");
+
+		if (node >= 0) {
+			fdt_del_node(fdt, node);
 		}
 	}
 
diff --git a/plat/arm/board/arm_fpga/fpga_gicv3.c b/plat/arm/board/arm_fpga/fpga_gicv3.c
index 9fb5fa9..bfc116b 100644
--- a/plat/arm/board/arm_fpga/fpga_gicv3.c
+++ b/plat/arm/board/arm_fpga/fpga_gicv3.c
@@ -77,3 +77,8 @@
 	gicv3_cpuif_disable(plat_my_core_pos());
 	gicv3_rdistif_off(plat_my_core_pos());
 }
+
+unsigned int fpga_get_nr_gic_cores(void)
+{
+	return gicv3_rdistif_get_number_frames(fpga_gicv3_driver_data.gicr_base);
+}
diff --git a/plat/arm/board/arm_fpga/fpga_private.h b/plat/arm/board/arm_fpga/fpga_private.h
index 47059d6..1ca241f 100644
--- a/plat/arm/board/arm_fpga/fpga_private.h
+++ b/plat/arm/board/arm_fpga/fpga_private.h
@@ -24,6 +24,7 @@
 void fpga_pwr_gic_on_finish(void);
 void fpga_pwr_gic_off(void);
 unsigned int plat_fpga_calc_core_pos(uint32_t mpid);
+unsigned int fpga_get_nr_gic_cores(void);
 
 #endif /* __ASSEMBLER__ */
 
diff --git a/plat/arm/board/arm_fpga/platform.mk b/plat/arm/board/arm_fpga/platform.mk
index 8f0ff0b..ab576b6 100644
--- a/plat/arm/board/arm_fpga/platform.mk
+++ b/plat/arm/board/arm_fpga/platform.mk
@@ -89,6 +89,8 @@
 				plat/common/plat_gicv3.c		\
 				plat/arm/board/arm_fpga/fpga_gicv3.c
 
+FDT_SOURCES		:=	fdts/arm_fpga.dts
+
 PLAT_INCLUDES		:=	-Iplat/arm/board/arm_fpga/include
 
 PLAT_BL_COMMON_SOURCES	:=	plat/arm/board/arm_fpga/${ARCH}/fpga_helpers.S
@@ -106,4 +108,11 @@
 				${FPGA_CPU_LIBS}				\
 				${FPGA_GIC_SOURCES}
 
+$(eval $(call MAKE_S,$(BUILD_PLAT),plat/arm/board/arm_fpga/rom_trampoline.S,31))
+$(eval $(call MAKE_LD,$(BUILD_PLAT)/build_axf.ld,plat/arm/board/arm_fpga/build_axf.ld.S,31))
+
+bl31.axf: bl31 dtbs ${BUILD_PLAT}/rom_trampoline.o ${BUILD_PLAT}/build_axf.ld
+	$(ECHO) "  LD      $@"
+	$(Q)$(LD) -T ${BUILD_PLAT}/build_axf.ld -L ${BUILD_PLAT} --strip-debug -o ${BUILD_PLAT}/bl31.axf
+
-all: bl31
+all: bl31.axf
diff --git a/plat/arm/board/arm_fpga/rom_trampoline.S b/plat/arm/board/arm_fpga/rom_trampoline.S
new file mode 100644
index 0000000..cd66c79
--- /dev/null
+++ b/plat/arm/board/arm_fpga/rom_trampoline.S
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2020, ARM Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * The Arm Ltd. FPGA images start execution at address 0x0, which is
+ * mapped at an (emulated) ROM image. The payload uploader can write to
+ * this memory, but write access by the CPU cores is prohibited.
+ *
+ * Provide a simple trampoline to start BL31 execution at the actual
+ * load address. We put the DTB address in x0, so any code in DRAM could
+ * make use of that information (not yet used in BL31 right now).
+ */
+
+#include <asm_macros.S>
+#include <common/bl_common.ld.h>
+
+.text
+.global _start
+
+_start:
+	mov_imm	x1, BL31_BASE			/* beginning of DRAM */
+	mov_imm	x0, FPGA_PRELOADED_DTB_BASE
+	br	x1