feat(plat/st): improve FIP image loading from MMC

Instead of using a scratch buffer of 512 bytes, we can directly use the
image address and max size. The mmc_block_dev_spec struct info is then
overwritten for each image with this info, except FW_CONFIG and GPT
table which will still use the scratch buffer.
This allows using multiple blocks read on MMC, and so improves the boot
time.
A cache invalidate is required for the remaining data not used from the
first and last blocks read. It is not required for FW_CONFIG_ID,
as it is in scratch buffer in SYSRAM, and also because bl_mem_params
struct is overwritten in this case. This should also not be done if
the image is not found (OP-TEE extra binaries when using SP_min).

Change-Id: If3ecfdfe35bb9db66284036ca49c4bd1be4fd121
Signed-off-by: Yann Gautier <yann.gautier@foss.st.com>
diff --git a/plat/st/stm32mp1/bl2_plat_setup.c b/plat/st/stm32mp1/bl2_plat_setup.c
index 83e5cd1..53177f6 100644
--- a/plat/st/stm32mp1/bl2_plat_setup.c
+++ b/plat/st/stm32mp1/bl2_plat_setup.c
@@ -15,6 +15,7 @@
 #include <common/desc_image_load.h>
 #include <drivers/delay_timer.h>
 #include <drivers/generic_delay_timer.h>
+#include <drivers/mmc.h>
 #include <drivers/st/bsec.h>
 #include <drivers/st/stm32_console.h>
 #include <drivers/st/stm32_iwdg.h>
@@ -395,6 +396,19 @@
 		break;
 	}
 
+#if STM32MP_SDMMC || STM32MP_EMMC
+	/*
+	 * Invalidate remaining data read from MMC but not flushed by load_image_flush().
+	 * We take the worst case which is 2 MMC blocks.
+	 */
+	if ((image_id != FW_CONFIG_ID) &&
+	    ((bl_mem_params->image_info.h.attr & IMAGE_ATTRIB_SKIP_LOADING) == 0U)) {
+		inv_dcache_range(bl_mem_params->image_info.image_base +
+				 bl_mem_params->image_info.image_size,
+				 2U * MMC_BLOCK_SIZE);
+	}
+#endif /* STM32MP_SDMMC || STM32MP_EMMC */
+
 	return err;
 }