feat(imx8ulp): add APD power down mode(PD) support in system suspend

The APD can be put into PD mode when linux suspend(mem). This patch
add the support for it. As the whole AP domain's context will be lost,
so we must save the necessary HW module states before entering PD mode,
and we need to restore those contexts when system wake up. Fot details
about which HW module's state will be lost, please refer to the RM.

When APD enter PD mode, only the wakeup event connected to the WUU
can wakeup APD successfully. The upower wakeup source is used to
wakeup APD by RTD due to the factor that the MU between A core & M
core is not connected into WUU to generate wakeup event.

as the SRAM0 will be power down when APD enters PD mode, so we
need to re-init the scmi channels(resides in the SRAM0). otherwise
the SCMI can NOT work anymore.

Signed-off-by: Jacky Bai <ping.bai@nxp.com>
Reviewed-by: Ye Li <ye.li@nxp.com>
Change-Id: I44b0cdc8397e5d6a82081ea6746542e9fa4b9fc1
diff --git a/plat/imx/imx8ulp/apd_context.c b/plat/imx/imx8ulp/apd_context.c
new file mode 100644
index 0000000..dad3d77
--- /dev/null
+++ b/plat/imx/imx8ulp/apd_context.c
@@ -0,0 +1,636 @@
+/*
+ * Copyright 2021-2024 NXP
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <drivers/delay_timer.h>
+#include <lib/mmio.h>
+
+#include <plat_imx8.h>
+#include <xrdc.h>
+
+#define PCC_PR	BIT(31)
+#define PFD_VALID_MASK	U(0x40404040)
+
+#define S400_MU_BASE	U(0x27020000)
+#define S400_MU_RSR	(S400_MU_BASE + 0x12c)
+#define S400_MU_TRx(i)	(S400_MU_BASE + 0x200 + (i) * 4)
+#define S400_MU_RRx(i)	(S400_MU_BASE + 0x280 + (i) * 4)
+
+/*
+ * need to re-init the PLL, CGC1, PCC, CMC, XRDC, SIM, GPIO etc.
+ * init the PLL &PFD first, then switch the CA35 clock to PLL for
+ * performance consideration, restore other bus fabric clock.
+ */
+
+extern void imx8ulp_caam_init(void);
+extern void upower_wait_resp(void);
+extern void dram_enter_retention(void);
+extern void dram_exit_retention(void);
+
+struct plat_gic_ctx imx_gicv3_ctx;
+static uint32_t cmc1_pmprot;
+static uint32_t cmc1_srie;
+
+/* TPM5: global timer */
+static uint32_t tpm5[3];
+
+static uint32_t wdog3[2];
+
+/* CGC1 PLL2 */
+uint32_t pll2[][2] = {
+	{0x292c0510, 0x0}, {0x292c0518, 0x0}, {0x292c051c, 0x0},
+	{0x292c0520, 0x0}, {0x292c0500, 0x0},
+};
+
+/* CGC1 PLL3 */
+uint32_t pll3[][2] = {
+	{0x292c0604, 0x0}, {0x292c0608, 0x0}, {0x292c060c, 0x0},
+	{0x292c0610, 0x0}, {0x292c0618, 0x0}, {0x292c061c, 0x0},
+	{0x292c0620, 0x0}, {0x292c0624, 0x0}, {0x292c0600, 0x0},
+	{0x292c0614, 0x0},
+};
+
+/* CGC1 others */
+uint32_t cgc1[][2] = {
+	{0x292c0014, 0x0}, {0x292c0034, 0x0}, {0x292c0038, 0x0},
+	{0x292c0108, 0x0}, {0x292c0208, 0x0}, {0x292c0700, 0x0},
+	{0x292c0810, 0x0}, {0x292c0900, 0x0}, {0x292c0904, 0x0},
+	{0x292c0908, 0x0}, {0x292c090c, 0x0}, {0x292c0a00, 0x0},
+};
+
+static uint32_t pcc3[61];
+static uint32_t pcc4[32];
+
+static uint32_t pcc5_0[33];
+static uint32_t pcc5_1[][2] = {
+	{0x2da70084, 0x0}, {0x2da70088, 0x0}, {0x2da7008c, 0x0},
+	{0x2da700a0, 0x0}, {0x2da700a4, 0x0}, {0x2da700a8, 0x0},
+	{0x2da700ac, 0x0}, {0x2da700b0, 0x0}, {0x2da700b4, 0x0},
+	{0x2da700bc, 0x0}, {0x2da700c0, 0x0}, {0x2da700c8, 0x0},
+	{0x2da700cc, 0x0}, {0x2da700d0, 0x0}, {0x2da700f0, 0x0},
+	{0x2da700f4, 0x0}, {0x2da700f8, 0x0}, {0x2da70108, 0x0},
+	{0x2da7010c, 0x0}, {0x2da70110, 0x0}, {0x2da70114, 0x0},
+};
+
+static uint32_t cgc2[][2] = {
+	{0x2da60014, 0x0}, {0x2da60020, 0x0}, {0x2da6003c, 0x0},
+	{0x2da60040, 0x0}, {0x2da60108, 0x0}, {0x2da60208, 0x0},
+	{0x2da60900, 0x0}, {0x2da60904, 0x0}, {0x2da60908, 0x0},
+	{0x2da60910, 0x0}, {0x2da60a00, 0x0},
+};
+
+static uint32_t pll4[][2] = {
+	{0x2da60604, 0x0}, {0x2da60608, 0x0}, {0x2da6060c, 0x0},
+	{0x2da60610, 0x0}, {0x2da60618, 0x0}, {0x2da6061c, 0x0},
+	{0x2da60620, 0x0}, {0x2da60624, 0x0}, {0x2da60600, 0x0},
+	{0x2da60614, 0x0},
+};
+
+static uint32_t lpav_sim[][2] = {
+	{0x2da50000, 0x0}, {0x2da50004, 0x0}, {0x2da50008, 0x0},
+	{0x2da5001c, 0x0}, {0x2da50020, 0x0}, {0x2da50024, 0x0},
+	{0x2da50034, 0x0},
+};
+
+#define APD_GPIO_CTRL_NUM		2
+#define LPAV_GPIO_CTRL_NUM		1
+#define GPIO_CTRL_REG_NUM		8
+#define GPIO_PIN_MAX_NUM	32
+#define GPIO_CTX(addr, num)	\
+	{.base = (addr), .pin_num = (num), }
+
+struct gpio_ctx {
+	/* gpio base */
+	uintptr_t base;
+	/* port control */
+	uint32_t port_ctrl[GPIO_CTRL_REG_NUM];
+	/* GPIO ICR, Max 32 */
+	uint32_t pin_num;
+	uint32_t gpio_icr[GPIO_PIN_MAX_NUM];
+};
+
+static uint32_t gpio_ctrl_offset[GPIO_CTRL_REG_NUM] = {
+	 0xc, 0x10, 0x14, 0x18, 0x1c, 0x40, 0x54, 0x58
+};
+static struct gpio_ctx apd_gpio_ctx[APD_GPIO_CTRL_NUM] = {
+	GPIO_CTX(IMX_GPIOE_BASE, 24),
+	GPIO_CTX(IMX_GPIOF_BASE, 32),
+};
+
+static struct gpio_ctx lpav_gpio_ctx = GPIO_CTX(IMX_GPIOD_BASE, 24);
+/* iomuxc setting */
+#define IOMUXC_SECTION_NUM	8
+struct iomuxc_section {
+	uint32_t offset;
+	uint32_t reg_num;
+};
+
+struct iomuxc_section iomuxc_sections[IOMUXC_SECTION_NUM] = {
+	{.offset = IOMUXC_PTD_PCR_BASE, .reg_num = 24},
+	{.offset = IOMUXC_PTE_PCR_BASE, .reg_num = 24},
+	{.offset = IOMUXC_PTF_PCR_BASE, .reg_num = 32},
+	{.offset = IOMUXC_PSMI_BASE0, .reg_num = 10},
+	{.offset = IOMUXC_PSMI_BASE1, .reg_num = 61},
+	{.offset = IOMUXC_PSMI_BASE2, .reg_num = 12},
+	{.offset = IOMUXC_PSMI_BASE3, .reg_num = 20},
+	{.offset = IOMUXC_PSMI_BASE4, .reg_num = 75},
+};
+static uint32_t iomuxc_ctx[258];
+
+void iomuxc_save(void)
+{
+	unsigned int i, j;
+	unsigned int index = 0U;
+
+	for (i = 0U; i < IOMUXC_SECTION_NUM; i++) {
+		for (j = 0U; j < iomuxc_sections[i].reg_num; j++) {
+			iomuxc_ctx[index++] = mmio_read_32(iomuxc_sections[i].offset + j * 4);
+		}
+	}
+}
+
+void iomuxc_restore(void)
+{
+	unsigned int i, j;
+	unsigned int index = 0U;
+
+	for (i = 0U; i < IOMUXC_SECTION_NUM; i++) {
+		for (j = 0U; j < iomuxc_sections[i].reg_num; j++) {
+			mmio_write_32(iomuxc_sections[i].offset + j * 4, iomuxc_ctx[index++]);
+		}
+	}
+}
+
+void gpio_save(struct gpio_ctx *ctx, int port_num)
+{
+	unsigned int i, j;
+
+	for (i = 0U; i < port_num; i++) {
+		/* save the port control setting */
+		for (j = 0U; j < GPIO_CTRL_REG_NUM; j++) {
+			if (j < 4U) {
+				ctx->port_ctrl[j] = mmio_read_32(ctx->base + gpio_ctrl_offset[j]);
+				/*
+				 * clear the permission setting to read the GPIO
+				 * non-secure world setting.
+				 */
+				mmio_write_32(ctx->base + gpio_ctrl_offset[j], 0x0);
+			} else {
+				ctx->port_ctrl[j] = mmio_read_32(ctx->base + gpio_ctrl_offset[j]);
+			}
+		}
+		/* save the gpio icr setting */
+		for (j = 0U; j < ctx->pin_num; j++) {
+			ctx->gpio_icr[j] = mmio_read_32(ctx->base + 0x80 + j * 4);
+		}
+
+		ctx++;
+	}
+}
+
+void gpio_restore(struct gpio_ctx *ctx, int port_num)
+{
+	unsigned int i, j;
+
+	for (i = 0U; i < port_num; i++) {
+		for (j = 0U; j < ctx->pin_num; j++)
+			mmio_write_32(ctx->base + 0x80 + j * 4, ctx->gpio_icr[j]);
+
+		for (j = 4U; j < GPIO_CTRL_REG_NUM; j++) {
+			mmio_write_32(ctx->base + gpio_ctrl_offset[j], ctx->port_ctrl[j]);
+		}
+
+		/* permission config retore last */
+		for (j = 0U; j < 4; j++) {
+			mmio_write_32(ctx->base + gpio_ctrl_offset[j], ctx->port_ctrl[j]);
+		}
+
+		ctx++;
+	}
+}
+
+void cgc1_save(void)
+{
+	unsigned int i;
+
+	/* PLL2 */
+	for (i = 0U; i < ARRAY_SIZE(pll2); i++) {
+		pll2[i][1] = mmio_read_32(pll2[i][0]);
+	}
+
+	/* PLL3 */
+	for (i = 0U; i < ARRAY_SIZE(pll3); i++) {
+		pll3[i][1] = mmio_read_32(pll3[i][0]);
+	}
+
+	/* CGC1 others */
+	for (i = 0U; i < ARRAY_SIZE(cgc1); i++) {
+		cgc1[i][1] = mmio_read_32(cgc1[i][0]);
+	}
+}
+
+void cgc1_restore(void)
+{
+	unsigned int i;
+
+	/* PLL2 */
+	for (i = 0U; i < ARRAY_SIZE(pll2); i++) {
+		mmio_write_32(pll2[i][0], pll2[i][1]);
+	}
+	/* wait for PLL2 lock */
+	while (!(mmio_read_32(pll2[4][0]) & BIT(24))) {
+		;
+	}
+
+	/* PLL3 */
+	for (i = 0U; i < 9U; i++) {
+		mmio_write_32(pll3[i][0], pll3[i][1]);
+	}
+
+	/* wait for PLL3 lock */
+	while (!(mmio_read_32(pll3[4][0]) & BIT(24))) {
+		;
+	}
+
+	/* restore the PFDs */
+	mmio_write_32(pll3[9][0], pll3[9][1] & ~(BIT(31) | BIT(23) | BIT(15) | BIT(7)));
+	mmio_write_32(pll3[9][0], pll3[9][1]);
+
+	/* wait for the PFD is stable, only need to check the enabled PFDs */
+	while (!(mmio_read_32(pll3[9][0]) & PFD_VALID_MASK)) {
+		;
+	}
+
+	/* CGC1 others */
+	for (i = 0U; i < ARRAY_SIZE(cgc1); i++) {
+		mmio_write_32(cgc1[i][0], cgc1[i][1]);
+	}
+}
+
+void tpm5_save(void)
+{
+	tpm5[0] = mmio_read_32(IMX_TPM5_BASE + 0x10);
+	tpm5[1] = mmio_read_32(IMX_TPM5_BASE + 0x18);
+	tpm5[2] = mmio_read_32(IMX_TPM5_BASE + 0x20);
+}
+
+void tpm5_restore(void)
+{
+	mmio_write_32(IMX_TPM5_BASE + 0x10, tpm5[0]);
+	mmio_write_32(IMX_TPM5_BASE + 0x18, tpm5[1]);
+	mmio_write_32(IMX_TPM5_BASE + 0x20, tpm5[2]);
+}
+
+void wdog3_save(void)
+{
+	/* enable wdog3 clock */
+	mmio_write_32(IMX_PCC3_BASE + 0xa8, 0xd2800000);
+
+	/* save the CS & TOVAL regiter */
+	wdog3[0] = mmio_read_32(IMX_WDOG3_BASE);
+	wdog3[1] = mmio_read_32(IMX_WDOG3_BASE + 0x8);
+}
+
+void wdog3_restore(void)
+{
+	/* enable wdog3 clock */
+	mmio_write_32(IMX_PCC3_BASE + 0xa8, 0xd2800000);
+
+	/* reconfig the CS */
+	mmio_write_32(IMX_WDOG3_BASE, wdog3[0]);
+	/* set the tiemout value */
+	mmio_write_32(IMX_WDOG3_BASE + 0x8, wdog3[1]);
+
+	/* wait for the lock status */
+	while ((mmio_read_32(IMX_WDOG3_BASE) & BIT(11))) {
+		;
+	}
+
+	/* wait for the config done */
+	while (!(mmio_read_32(IMX_WDOG3_BASE) & BIT(10))) {
+		;
+	}
+}
+
+static uint32_t lpuart_regs[4];
+#define LPUART_BAUD     0x10
+#define LPUART_CTRL     0x18
+#define LPUART_FIFO     0x28
+#define LPUART_WATER    0x2c
+
+void lpuart_save(void)
+{
+	lpuart_regs[0] = mmio_read_32(IMX_LPUART5_BASE + LPUART_BAUD);
+	lpuart_regs[1] = mmio_read_32(IMX_LPUART5_BASE + LPUART_FIFO);
+	lpuart_regs[2] = mmio_read_32(IMX_LPUART5_BASE + LPUART_WATER);
+	lpuart_regs[3] = mmio_read_32(IMX_LPUART5_BASE + LPUART_CTRL);
+}
+
+void lpuart_restore(void)
+{
+	mmio_write_32(IMX_LPUART5_BASE + LPUART_BAUD, lpuart_regs[0]);
+	mmio_write_32(IMX_LPUART5_BASE + LPUART_FIFO, lpuart_regs[1]);
+	mmio_write_32(IMX_LPUART5_BASE + LPUART_WATER, lpuart_regs[2]);
+	mmio_write_32(IMX_LPUART5_BASE + LPUART_CTRL, lpuart_regs[3]);
+}
+
+bool is_lpav_owned_by_apd(void)
+{
+	return (mmio_read_32(0x2802b044) & BIT(7)) ? true : false;
+}
+
+void lpav_ctx_save(void)
+{
+	unsigned int i;
+	uint32_t val;
+
+	/* CGC2 save */
+	for (i = 0U; i < ARRAY_SIZE(cgc2); i++) {
+		cgc2[i][1] = mmio_read_32(cgc2[i][0]);
+	}
+
+	/* PLL4 */
+	for (i = 0U; i < ARRAY_SIZE(pll4); i++) {
+		pll4[i][1] = mmio_read_32(pll4[i][0]);
+	}
+
+	/* PCC5 save */
+	for (i = 0U; i < ARRAY_SIZE(pcc5_0); i++) {
+		val = mmio_read_32(IMX_PCC5_BASE + i * 4);
+		if (val & PCC_PR) {
+			pcc5_0[i] = val;
+		}
+	}
+
+	for (i = 0U; i < ARRAY_SIZE(pcc5_1); i++) {
+		val = mmio_read_32(pcc5_1[i][0]);
+		if (val & PCC_PR) {
+			pcc5_1[i][1] = val;
+		}
+	}
+
+	/* LPAV SIM save */
+	for (i = 0U; i < ARRAY_SIZE(lpav_sim); i++) {
+		lpav_sim[i][1] = mmio_read_32(lpav_sim[i][0]);
+	}
+
+	/* Save GPIO port D */
+	gpio_save(&lpav_gpio_ctx, LPAV_GPIO_CTRL_NUM);
+
+	/* put DDR into retention */
+	dram_enter_retention();
+}
+
+void lpav_ctx_restore(void)
+{
+	unsigned int i;
+
+	/* PLL4 */
+	for (i = 0U; i < 9U; i++) {
+		mmio_write_32(pll4[i][0], pll4[i][1]);
+	}
+
+	/* wait for PLL4 lock */
+	while (!(mmio_read_32(pll4[8][0]) & BIT(24))) {
+		;
+	}
+
+	/* restore the PLL4 PFDs */
+	mmio_write_32(pll4[9][0], pll4[9][1] & ~(BIT(31) | BIT(23) | BIT(15) | BIT(7)));
+	mmio_write_32(pll4[9][0], pll4[9][1]);
+
+	/* wait for the PFD is stable */
+	while (!(mmio_read_32(pll4[9][0]) & PFD_VALID_MASK)) {
+		;
+	}
+
+	/* CGC2 restore */
+	for (i = 0U; i < ARRAY_SIZE(cgc2); i++) {
+		mmio_write_32(cgc2[i][0], cgc2[i][1]);
+	}
+
+	/* PCC5 restore */
+	for (i = 0U; i < ARRAY_SIZE(pcc5_0); i++) {
+		if (pcc5_0[i] & PCC_PR) {
+			mmio_write_32(IMX_PCC5_BASE + i * 4, pcc5_0[i]);
+		}
+	}
+
+	for (i = 0U; i < ARRAY_SIZE(pcc5_1); i++) {
+		if (pcc5_1[i][1] & PCC_PR) {
+			mmio_write_32(pcc5_1[i][0], pcc5_1[i][1]);
+		}
+	}
+
+	/* LPAV_SIM */
+	for (i = 0U; i < ARRAY_SIZE(lpav_sim); i++) {
+		mmio_write_32(lpav_sim[i][0], lpav_sim[i][1]);
+	}
+
+	gpio_restore(&lpav_gpio_ctx, LPAV_GPIO_CTRL_NUM);
+	/* DDR retention exit */
+	dram_exit_retention();
+}
+
+void imx_apd_ctx_save(unsigned int proc_num)
+{
+	unsigned int i;
+	uint32_t val;
+
+	/* enable LPUART5's clock by default */
+	mmio_setbits_32(IMX_PCC3_BASE + 0xe8, BIT(30));
+
+	/* save the gic config */
+	plat_gic_save(proc_num, &imx_gicv3_ctx);
+
+	cmc1_pmprot = mmio_read_32(IMX_CMC1_BASE + 0x18);
+	cmc1_srie = mmio_read_32(IMX_CMC1_BASE + 0x8c);
+
+	/* save the PCC3 */
+	for (i = 0U; i < ARRAY_SIZE(pcc3); i++) {
+		/* save the pcc if it is exist */
+		val = mmio_read_32(IMX_PCC3_BASE + i * 4);
+		if (val & PCC_PR) {
+			pcc3[i] = val;
+		}
+	}
+
+	/* save the PCC4 */
+	for (i = 0U; i < ARRAY_SIZE(pcc4); i++) {
+		/* save the pcc if it is exist */
+		val = mmio_read_32(IMX_PCC4_BASE + i * 4);
+		if (val & PCC_PR) {
+			pcc4[i] = val;
+		}
+	}
+
+	/* save the CGC1 */
+	cgc1_save();
+
+	wdog3_save();
+
+	gpio_save(apd_gpio_ctx, APD_GPIO_CTRL_NUM);
+
+	iomuxc_save();
+
+	tpm5_save();
+
+	lpuart_save();
+
+	/*
+	 * save the lpav ctx & put the ddr into retention
+	 * if lpav master is assigned to APD domain.
+	 */
+	if (is_lpav_owned_by_apd()) {
+		lpav_ctx_save();
+	}
+}
+
+void xrdc_reinit(void)
+{
+	xrdc_apply_apd_config();
+	xrdc_apply_lpav_config();
+
+	xrdc_enable();
+}
+
+void s400_release_caam(void)
+{
+	uint32_t msg, resp;
+
+	mmio_write_32(S400_MU_TRx(0), 0x17d70206);
+	mmio_write_32(S400_MU_TRx(1), 0x7);
+
+	do {
+		resp = mmio_read_32(S400_MU_RSR);
+	} while ((resp & 0x3) != 0x3);
+
+	msg = mmio_read_32(S400_MU_RRx(0));
+	resp = mmio_read_32(S400_MU_RRx(1));
+
+	VERBOSE("resp %x; %x", msg, resp);
+}
+
+void imx_apd_ctx_restore(unsigned int proc_num)
+{
+	unsigned int i;
+
+	/* restore the CCG1 */
+	cgc1_restore();
+
+	for (i = 0U; i < ARRAY_SIZE(pcc3); i++) {
+		/* save the pcc if it is exist */
+		if (pcc3[i] & PCC_PR) {
+			mmio_write_32(IMX_PCC3_BASE + i * 4, pcc3[i]);
+		}
+	}
+
+	for (i = 0U; i < ARRAY_SIZE(pcc4); i++) {
+		if (pcc4[i] & PCC_PR) {
+			mmio_write_32(IMX_PCC4_BASE + i * 4, pcc4[i]);
+		}
+	}
+
+	wdog3_restore();
+
+	iomuxc_restore();
+
+	gpio_restore(apd_gpio_ctx, APD_GPIO_CTRL_NUM);
+
+	tpm5_restore();
+
+	xrdc_reinit();
+
+	/* restore the gic config */
+	plat_gic_restore(proc_num, &imx_gicv3_ctx);
+
+	mmio_write_32(IMX_CMC1_BASE + 0x18, cmc1_pmprot);
+	mmio_write_32(IMX_CMC1_BASE + 0x8c, cmc1_srie);
+
+	/* enable LPUART5's clock by default */
+	mmio_setbits_32(IMX_PCC3_BASE + 0xe8, BIT(30));
+
+	/* restore the console lpuart */
+	lpuart_restore();
+
+	/* FIXME: make uart work for ATF */
+	mmio_write_32(IMX_LPUART_BASE + 0x18, 0xc0000);
+
+	/*
+	 * Ask S400 to release caam to APD as it is owned by s400
+	 */
+	s400_release_caam();
+
+	/* re-init the caam */
+	imx8ulp_caam_init();
+
+	/*
+	 * ack the upower, seems a necessary steps, otherwise the upower can
+	 * not response to the new API service call. put this just before the
+	 * ddr retention exit because that the dram retention exit flow need to
+	 * communicate with upower.
+	 */
+	upower_wait_resp();
+
+	/*
+	 * restore the lpav ctx & make ddr out of retention
+	 * if lpav master is assigned to APD domain.
+	 */
+	if (is_lpav_owned_by_apd()) {
+		lpav_ctx_restore();
+	}
+}
+
+#define DGO_CTRL1	U(0xc)
+#define USB_WAKEUP	U(0x44)
+#define USB1_PHY_DPD_WAKEUP_EN	BIT_32(5)
+#define USB0_PHY_DPD_WAKEUP_EN	BIT_32(4)
+#define USB1_PHY_WAKEUP_ISO_DISABLE	BIT_32(1)
+#define USB0_PHY_WAKEUP_ISO_DISABLE	BIT_32(0)
+
+void usb_wakeup_enable(bool enable)
+{
+	if (enable) {
+		mmio_setbits_32(IMX_SIM1_BASE + USB_WAKEUP,
+				USB1_PHY_WAKEUP_ISO_DISABLE | USB0_PHY_WAKEUP_ISO_DISABLE);
+		mmio_setbits_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(0));
+		while (!(mmio_read_32(IMX_SIM1_BASE + DGO_CTRL1) & BIT(1))) {
+			;
+		}
+
+		mmio_clrbits_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(0));
+		mmio_write_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(1));
+
+		/* Need to delay for a while to make sure the wakeup logic can work */
+		udelay(500);
+
+		mmio_setbits_32(IMX_SIM1_BASE + USB_WAKEUP,
+				USB1_PHY_DPD_WAKEUP_EN | USB0_PHY_DPD_WAKEUP_EN);
+		mmio_setbits_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(0));
+		while (!(mmio_read_32(IMX_SIM1_BASE + DGO_CTRL1) & BIT(1))) {
+			;
+		}
+
+		mmio_clrbits_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(0));
+		mmio_write_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(1));
+	} else {
+		/*
+		 * USBx_PHY_DPD_WAKEUP_EN should be cleared before USB0_PHY_WAKEUP_ISO_DISABLE
+		 * to provide the correct the wake-up functionality.
+		 */
+		mmio_write_32(IMX_SIM1_BASE + USB_WAKEUP, USB1_PHY_WAKEUP_ISO_DISABLE |
+			USB0_PHY_WAKEUP_ISO_DISABLE);
+		mmio_write_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(0));
+		while (!(mmio_read_32(IMX_SIM1_BASE + DGO_CTRL1) & BIT(1))) {
+			;
+		}
+
+		mmio_clrbits_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(0));
+		mmio_write_32(IMX_SIM1_BASE + DGO_CTRL1, BIT(1));
+	}
+}
diff --git a/plat/imx/imx8ulp/dram.c b/plat/imx/imx8ulp/dram.c
new file mode 100644
index 0000000..c204947
--- /dev/null
+++ b/plat/imx/imx8ulp/dram.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2021-2024 NXP
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <lib/mmio.h>
+#include <platform_def.h>
+
+#include <upower_api.h>
+
+#define PHY_FREQ_SEL_INDEX(x)		((x) << 16)
+#define PHY_FREQ_MULTICAST_EN(x)	((x) << 8)
+#define DENALI_PHY_1537			U(0x5804)
+
+#define IMX_DDRC_BASE			U(0x2E060000)
+#define SAVED_DRAM_DATA_BASE		U(0x20055000)
+#define DENALI_CTL_144			0x240
+#define LPI_WAKEUP_EN_SHIFT		U(8)
+#define IMX_LPAV_SIM_BASE		0x2DA50000
+#define LPDDR_CTRL			0x14
+#define LPDDR_AUTO_LP_MODE_DISABLE	BIT(24)
+#define SOC_LP_CMD_SHIFT		U(15)
+#define LPDDR_CTRL2			0x18
+
+#define DENALI_CTL_00			U(0x0)
+#define DENALI_CTL_23			U(0x5c)
+#define DFIBUS_FREQ_INIT_SHIFT		U(24)
+#define TSREF2PHYMSTR_SHIFT		U(8)
+#define TSREF2PHYMSTR_MASK		GENMASK(13, 8)
+
+#define DENALI_CTL_24			U(0x60)
+#define DENALI_CTL_25			U(0x64)
+
+#define DENALI_CTL_93			U(0x174)
+#define PWRUP_SREFRESH_EXIT		BIT(0)
+
+#define DENALI_CTL_127				U(0x1fc)
+#define PHYMSTR_TRAIN_AFTER_INIT_COMPLETE	BIT(16)
+
+#define DENALI_CTL_147			U(0x24c)
+#define DENALI_CTL_153			U(0x264)
+#define PCPCS_PD_EN			BIT(8)
+
+#define DENALI_CTL_249			U(0x3E4)
+#define DENALI_CTL_266			U(0x428)
+
+#define DENALI_PHY_1547			U(0x582c)
+#define PHY_LP4_BOOT_DISABLE		BIT(8)
+
+#define DENALI_PHY_1559			U(0x585c)
+#define DENALI_PHY_1590			U(0x58D8)
+
+#define DENALI_PI_00			U(0x2000)
+#define DENALI_PI_04			U(0x2010)
+#define DENALI_PI_52			U(0x20D0)
+#define DENALI_PI_26			U(0x2068)
+#define DENALI_PI_33			U(0x2084)
+#define DENALI_PI_65			U(0x2104)
+#define DENALI_PI_77			U(0x2134)
+#define DENALI_PI_134			U(0x2218)
+#define DENALI_PI_131			U(0x220C)
+#define DENALI_PI_132			U(0x2210)
+#define DENALI_PI_134			U(0x2218)
+#define DENALI_PI_137			U(0x2224)
+#define DENALI_PI_174			U(0x22B8)
+#define DENALI_PI_175			U(0x22BC)
+#define DENALI_PI_181			U(0x22D4)
+#define DENALI_PI_182			U(0x22D8)
+#define DENALI_PI_191			U(0x22FC)
+#define DENALI_PI_192			U(0x2300)
+#define DENALI_PI_212			U(0x2350)
+#define DENALI_PI_214			U(0x2358)
+#define DENALI_PI_217			U(0x2364)
+
+#define LPDDR3_TYPE	U(0x7)
+#define LPDDR4_TYPE	U(0xB)
+
+extern void upower_wait_resp(void);
+
+struct dram_cfg_param {
+	uint32_t reg;
+	uint32_t val;
+};
+
+struct dram_timing_info {
+	/* ddr controller config */
+	struct dram_cfg_param *ctl_cfg;
+	unsigned int ctl_cfg_num;
+	/* pi config */
+	struct dram_cfg_param *pi_cfg;
+	unsigned int pi_cfg_num;
+	/* phy freq1 config */
+	struct dram_cfg_param *phy_f1_cfg;
+	unsigned int phy_f1_cfg_num;
+	/* phy freq2 config */
+	struct dram_cfg_param *phy_f2_cfg;
+	unsigned int phy_f2_cfg_num;
+	/* initialized drate table */
+	unsigned int fsp_table[3];
+};
+
+#define CTL_NUM		U(680)
+#define PI_NUM		U(298)
+#define PHY_NUM		U(1654)
+#define PHY_DIFF_NUM	U(49)
+struct dram_cfg {
+	uint32_t ctl_cfg[CTL_NUM];
+	uint32_t pi_cfg[PI_NUM];
+	uint32_t phy_full[PHY_NUM];
+	uint32_t phy_diff[PHY_DIFF_NUM];
+};
+
+struct dram_timing_info *info;
+struct dram_cfg *dram_timing_cfg;
+
+/* mark if dram cfg is already saved */
+static bool dram_cfg_saved;
+static uint32_t dram_class;
+
+/* PHY register index for frequency diff */
+uint32_t freq_specific_reg_array[PHY_DIFF_NUM] = {
+90, 92, 93, 96, 97, 100, 101, 102, 103, 104, 114,
+346, 348, 349, 352, 353, 356, 357, 358, 359, 360,
+370, 602, 604, 605, 608, 609, 612, 613, 614, 615,
+616, 626, 858, 860, 861, 864, 865, 868, 869, 870,
+871, 872, 882, 1063, 1319, 1566, 1624, 1625
+};
+
+static void ddr_init(void)
+{
+	unsigned int i;
+
+	/* restore the ddr ctl config */
+	for (i = 0U; i < CTL_NUM; i++) {
+		mmio_write_32(IMX_DDRC_BASE + i * 4, dram_timing_cfg->ctl_cfg[i]);
+	}
+
+	/* load the PI registers */
+	for (i = 0U; i < PI_NUM; i++) {
+		mmio_write_32(IMX_DDRC_BASE + 0x2000 + i * 4, dram_timing_cfg->pi_cfg[i]);
+	}
+
+
+	 /* restore all PHY registers for all the fsp. */
+	mmio_write_32(IMX_DDRC_BASE + DENALI_PHY_1537, 0x100);
+	/* restore all the phy configs */
+	for (i = 0U; i < PHY_NUM; i++) {
+		/* skip the reserved registers space */
+		if (i >= 121U && i <= 255U) {
+			continue;
+		}
+		if (i >= 377U && i <= 511U) {
+			continue;
+		}
+		if (i >= 633U && i <= 767U) {
+			continue;
+		}
+		if (i >= 889U && i <= 1023U) {
+			continue;
+		}
+		if (i >= 1065U && i <= 1279U) {
+			continue;
+		}
+		if (i >= 1321U && i <= 1535U) {
+			continue;
+		}
+		mmio_write_32(IMX_DDRC_BASE + 0x4000 + i * 4, dram_timing_cfg->phy_full[i]);
+	}
+
+	if (dram_class == LPDDR4_TYPE) {
+		/* restore only the diff. */
+		mmio_write_32(IMX_DDRC_BASE + DENALI_PHY_1537, 0x0);
+		for (i = 0U; i < PHY_DIFF_NUM; i++) {
+			mmio_write_32(IMX_DDRC_BASE + 0x4000 + freq_specific_reg_array[i] * 4,
+				      dram_timing_cfg->phy_diff[i]);
+		}
+	}
+
+	/* Re-enable MULTICAST mode */
+	mmio_write_32(IMX_DDRC_BASE + DENALI_PHY_1537, PHY_FREQ_MULTICAST_EN(1));
+}
+
+void dram_enter_retention(void)
+{
+	unsigned int i;
+
+	/* 1. config the PCC_LPDDR4[SSADO] to 2b'11 for ACK domain 0/1's STOP */
+	mmio_setbits_32(IMX_PCC5_BASE + 0x108, 0x2 << 22);
+
+	/*
+	 * 2. Make sure the DENALI_CTL_144[LPI_WAKEUP_EN[5:0]] has the bit
+	 * LPI_WAKEUP_EN[3] = 1b'1. This enables the option 'self-refresh
+	 * long with mem and ctlr clk gating or self-refresh  power-down
+	 * long with mem and ctlr clk gating'
+	 */
+	mmio_setbits_32(IMX_DDRC_BASE + DENALI_CTL_144, BIT(3) << LPI_WAKEUP_EN_SHIFT);
+
+	/*
+	 * 3a. Config SIM_LPAV LPDDR_CTRL[LPDDR_AUTO_LP_MODE_DISABLE] to 1b'0(enable
+	 * the logic to automatic handles low power entry/exit. This is the recommended
+	 * option over handling through software.
+	 * 3b. Config the SIM_LPAV LPDDR_CTRL[SOC_LP_CMD] to 6b'101001(encoding for
+	 * self_refresh with both DDR controller and DRAM clock gate. THis is mandatory
+	 * since LPPDR logic will be power gated).
+	 */
+	mmio_clrbits_32(IMX_LPAV_SIM_BASE + LPDDR_CTRL, LPDDR_AUTO_LP_MODE_DISABLE);
+	mmio_clrsetbits_32(IMX_LPAV_SIM_BASE + LPDDR_CTRL,
+			   0x3f << SOC_LP_CMD_SHIFT, 0x29 << SOC_LP_CMD_SHIFT);
+
+	/* Save DDR Controller & PHY config.
+	 * Set PHY_FREQ_SEL_MULTICAST_EN=0 & PHY_FREQ_SEL_INDEX=1. Read and store all
+	 * the PHY registers for F2 into phy_f1_cfg, then read/store the diff between
+	 * F1 & F2 into phy_f2_cfg.
+	 */
+	if (!dram_cfg_saved) {
+		info = (struct dram_timing_info *)SAVED_DRAM_DATA_BASE;
+		dram_timing_cfg = (struct dram_cfg *)(SAVED_DRAM_DATA_BASE +
+					sizeof(struct dram_timing_info));
+
+		/* get the dram type */
+		dram_class = mmio_read_32(IMX_DDRC_BASE + DENALI_CTL_00);
+		dram_class = (dram_class >> 8) & 0xf;
+
+		/* save the ctl registers */
+		for (i = 0U; i < CTL_NUM; i++) {
+			dram_timing_cfg->ctl_cfg[i] = mmio_read_32(IMX_DDRC_BASE + i * 4);
+		}
+		dram_timing_cfg->ctl_cfg[0] = dram_timing_cfg->ctl_cfg[0] & 0xFFFFFFFE;
+
+		/* save the PI registers */
+		for (i = 0U; i < PI_NUM; i++) {
+			dram_timing_cfg->pi_cfg[i] = mmio_read_32(IMX_DDRC_BASE + 0x2000 + i * 4);
+		}
+		dram_timing_cfg->pi_cfg[0] = dram_timing_cfg->pi_cfg[0] & 0xFFFFFFFE;
+
+		/*
+		 * Read and store all PHY registers. full array is a full
+		 * copy for all the setpoint
+		 */
+		if (dram_class == LPDDR4_TYPE) {
+			mmio_write_32(IMX_DDRC_BASE + DENALI_PHY_1537, 0x10000);
+			for (i = 0U; i < PHY_NUM; i++) {
+				/* Make sure MULTICASE is enabled */
+				if (i == 1537U) {
+					dram_timing_cfg->phy_full[i] = 0x100;
+				} else {
+					dram_timing_cfg->phy_full[i] = mmio_read_32(IMX_DDRC_BASE + 0x4000 + i * 4);
+				}
+			}
+
+			/*
+			 * set PHY_FREQ_SEL_MULTICAST_EN=0 & PHY_FREQ_SEL_INDEX=0.
+			 * Read and store only the diff.
+			 */
+			mmio_write_32(IMX_DDRC_BASE + DENALI_PHY_1537, 0x0);
+			/* save only the frequency based diff config to save memory */
+			for (i = 0U; i < PHY_DIFF_NUM; i++) {
+				dram_timing_cfg->phy_diff[i] = mmio_read_32(IMX_DDRC_BASE + 0x4000 +
+									    freq_specific_reg_array[i] * 4);
+			}
+		} else {
+			/* LPDDR3, only f1 need to save */
+			for (i = 0U; i < info->phy_f1_cfg_num; i++) {
+				info->phy_f1_cfg[i].val = mmio_read_32(info->phy_f1_cfg[i].reg);
+			}
+		}
+
+		dram_cfg_saved = true;
+	}
+}
+
+void dram_exit_retention(void)
+{
+	uint32_t val;
+
+	/* 1. Config the LPAV PLL4 and DDR clock for the desired LPDDR operating frequency. */
+	mmio_setbits_32(IMX_PCC5_BASE + 0x108, BIT(30));
+
+	/* 2. Write PCC5.PCC_LPDDR4[SWRST] to 1b'1 to release LPDDR from reset. */
+	mmio_setbits_32(IMX_PCC5_BASE + 0x108, BIT(28));
+
+	/* 3. Reload the LPDDR CTL/PI/PHY register */
+	ddr_init();
+
+	if (dram_class == LPDDR4_TYPE) {
+		/* 4a. FIXME Set PHY_SET_DFI_INPUT_N parameters to 4'h1. LPDDR4 only */
+		mmio_write_32(IMX_DDRC_BASE + DENALI_PHY_1559, 0x01010101);
+
+		/*
+		 * 4b. CTL PWRUP_SREFRESH_EXIT=1'b0 for disabling self refresh exit
+		 * from controller.
+		 */
+		/*
+		 * 4c. PI_PWRUP_SELF_REF_EXIT=1, PI_MC_PWRUP_SELF_REF_EXIT=0 for enabling
+		 * self refresh exit from PI
+		 */
+		/* 4c. PI_INT_LVL_EN=0 to skip Initialization trainings. */
+		/*
+		 * 4d. PI_WRLVL_EN_F0/1/2= PI_CALVL_EN_F0/1/2= PI_RDLVL_EN_F0/1/2=
+		 * PI_RDLVL_GATE_EN_F0/1/2= PI_WDQLVL_EN_F0/1/2=0x2.
+		 * Enable non initialization trainings.
+		 */
+		/* 4e. PI_PWRUP_SREFRESH_EXIT_CS=0xF */
+		/* 4f. PI_DLL_RESET=0x1 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_137, 0x1);
+		/* PI_PWRUP_SELF_REF_EXIT = 1 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_132, 0x01000000);
+		/* PI_MC_PWRUP_SELF_REF_EXIT = 0 */
+		mmio_clrbits_32(IMX_DDRC_BASE + DENALI_PI_132, BIT(16));
+		/* PI_INT_LVL_EN = 0 */
+		mmio_clrbits_32(IMX_DDRC_BASE + DENALI_PI_04, BIT(0));
+		/* PI_WRLVL_EN_F0 = 3, PI_WRLVL_EN_F1 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_174, 0x03030000);
+		/* PI_WRLVL_EN_F2 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_175, 0x03);
+		/* PI_CALVL_EN_F0 = 3, PI_CALVL_EN_F1 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_191, 0x03030000);
+		/* PI_CALVL_EN_F2 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_192, 0x03);
+		/* PI_WDQLVL_EN_F0 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_212, 0x300);
+		/* PI_WDQLVL_EN_F1 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_214, 0x03000000);
+		/* PI_WDQLVL_EN_F2 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_217, 0x300);
+		/* PI_EDLVL_EN_F0 = 3, PI_EDLVL_GATE_EN_F0 = 3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_181, 0x03030000);
+		/*
+		 * PI_RDLVL_EN_F1 = 3, PI_RDLVL_GATE_EN_F1 = 3,
+		 * PI_RDLVL_EN_F2 = 3, PI_RDLVL_GATE_EN_F2 = 3
+		 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_182, 0x03030303);
+		/* PI_PWRUP_SREFRESH_EXIT_CS = 0xF */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_134, 0x000F0000);
+	} else {
+		/* PI_DLL_RESET=1 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_137, 0x1);
+		/* PI_PWRUP_SELF_REF_EXIT=1 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_132, 0x01000000);
+		/* PI_MC_PWRUP_SELF_REF_EXIT=0 */
+		mmio_clrbits_32(IMX_DDRC_BASE + DENALI_PI_132, BIT(16));
+		/* PI_INT_LVL_EN=0 */
+		mmio_clrbits_32(IMX_DDRC_BASE + DENALI_PI_04, BIT(0));
+		/* PI_WRLVL_EN_F0=3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_174, 0x00030000);
+		/* PI_CALVL_EN_F0=3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_191, 0x00030000);
+		/* PI_RDLVL_EN_F0=3,PI_RDLVL_GATE_EN_F0=3 */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_181, 0x03030000);
+		/* PI_PWRUP_SREFRESH_EXIT_CS=0xF */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_134, 0x000F0000);
+	}
+
+	mmio_write_32(IMX_DDRC_BASE + DENALI_CTL_144, 0x00002D00);
+
+	/* Force in-order AXI read data */
+	mmio_write_32(IMX_DDRC_BASE + DENALI_CTL_144, 0x1);
+
+	/*
+	 * Disable special R/W group switches so that R/W group placement
+	 * is always at END of R/W group.
+	 */
+	mmio_write_32(IMX_DDRC_BASE + DENALI_CTL_249, 0x0);
+
+	/* Reduce time for IO pad calibration */
+	mmio_write_32(IMX_DDRC_BASE + DENALI_PHY_1590, 0x01000000);
+
+	mmio_write_32(IMX_DDRC_BASE + DENALI_CTL_25, 0x00020100);
+
+	/* PD disable */
+	mmio_write_32(IMX_DDRC_BASE + DENALI_CTL_153, 0x04040000);
+	/*
+	 * 5. Disable automatic LP entry and PCPCS modes LP_AUTO_ENTRY_EN
+	 * to 1b'0, PCPCS_PD_EN to 1b'0
+	 */
+
+	upwr_xcp_set_ddr_retention(APD_DOMAIN, 0, NULL);
+	upower_wait_resp();
+
+	if (dram_class == LPDDR4_TYPE) {
+		/* 7. Write PI START parameter to 1'b1 */
+		mmio_write_32(IMX_DDRC_BASE + DENALI_PI_00, 0x00000b01);
+
+		/* 8. Write CTL START parameter to 1'b1 */
+		mmio_write_32(IMX_DDRC_BASE + DENALI_CTL_00, 0x00000b01);
+	} else {
+		/* 7. Write PI START parameter to 1'b1 */
+		mmio_write_32(IMX_DDRC_BASE + DENALI_PI_00, 0x00000701);
+
+		/* 8. Write CTL START parameter to 1'b1 */
+		mmio_write_32(IMX_DDRC_BASE + DENALI_CTL_00, 0x00000701);
+	}
+
+	/* 9. DENALI_CTL_266:  Wait for INT_STATUS_INIT=0x2 */
+	do {
+		val = (mmio_read_32(IMX_DDRC_BASE + DENALI_CTL_266) >> 8) & 0xFF;
+	} while (val != 0x2);
+
+	/*
+	 * 10. Run SW trainings by setting PI_CALVL_REQ,PI_WRLVL_REQ,PI_RDLVL_GATE_REQ,
+	 * PI_RDLVL_REQ,PI_WDQLVL_REQ(NA for LPDDR3) in same order.
+	 */
+	if (dram_class == LPDDR4_TYPE) {
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_52, 0x10000); /* CALVL */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_26, 0x100); /* WRLVL */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_33, 0x10000); /* RDGATE */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_33, 0x100); /* RDQLVL */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_65, 0x10000); /* WDQLVL */
+
+		/* 11. Wait for trainings to get complete by polling PI_INT_STATUS */
+		while ((mmio_read_32(IMX_DDRC_BASE + DENALI_PI_77) & 0x07E00000) != 0x07E00000) {
+			;
+		}
+	} else {
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_52, 0x10000); /* CALVL */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_26, 0x100); /* WRLVL */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_33, 0x10000); /* RDGATE */
+		mmio_setbits_32(IMX_DDRC_BASE + DENALI_PI_33, 0x100); /* RDQLVL */
+		while ((mmio_read_32(IMX_DDRC_BASE + DENALI_PI_77) & 0x05E00000) != 0x05E00000) {
+			;
+		}
+	}
+}
diff --git a/plat/imx/imx8ulp/imx8ulp_psci.c b/plat/imx/imx8ulp/imx8ulp_psci.c
index 3522ee0..32ecd49 100644
--- a/plat/imx/imx8ulp/imx8ulp_psci.c
+++ b/plat/imx/imx8ulp/imx8ulp_psci.c
@@ -16,6 +16,12 @@
 #include <plat_imx8.h>
 #include <upower_api.h>
 
+extern void cgc1_save(void);
+extern void cgc1_restore(void);
+extern void imx_apd_ctx_save(unsigned int cpu);
+extern void imx_apd_ctx_restore(unsigned int cpu);
+extern void usb_wakeup_enable(bool enable);
+
 static uintptr_t secure_entrypoint;
 
 #define CORE_PWR_STATE(state) ((state)->pwr_domain_state[MPIDR_AFFLVL0])
@@ -63,6 +69,8 @@
 		.mask = (m),	\
 	}
 
+extern void upower_wait_resp(void);
+
 static int imx_pwr_set_cpu_entry(unsigned int cpu, unsigned int entry)
 {
 	mmio_write_32(RVBARADDRx(cpu), entry);
@@ -120,8 +128,18 @@
 	/* set core power mode to PD */
 	mmio_write_32(AD_COREx_LPMODE(cpu), 0x3);
 }
+
 /* APD power mode config */
 ps_apd_pwr_mode_cfgs_t apd_pwr_mode_cfgs = {
+	/* PD */
+	[PD_PWR_MODE] = {
+		.swt_board_offs = 0x170,
+		.swt_mem_offs = 0x178,
+		.pmic_cfg = PMIC_CFG(0x23, 0x2, 0x2),
+		.pad_cfg = PAD_CFG(0x0, 0x0, 0x01e80a00),
+		.bias_cfg = BIAS_CFG(0x0, 0x2, 0x2, 0x0),
+	},
+
 	[ADMA_PWR_MODE] = {
 		.swt_board_offs = 0x120,
 		.swt_mem_offs = 0x128,
@@ -141,16 +159,22 @@
 
 /* APD power switch config */
 ps_apd_swt_cfgs_t apd_swt_cfgs = {
+	[PD_PWR_MODE] = {
+		.swt_board[0] = SWT_BOARD(0x00060003, 0x00001e74),
+		.swt_mem[0] = SWT_MEM(0x00010c00, 0x0, 0x1ffff),
+		.swt_mem[1] = SWT_MEM(0x003fffff, 0x003f0000, 0x0),
+	},
+
 	[ADMA_PWR_MODE] = {
-		.swt_board[0] = SWT_BOARD(0x74, 0x7c),
-		.swt_mem[0] = SWT_MEM(0x0001fffd, 0x0001fffd, 0x1fffd),
-		.swt_mem[1] = SWT_MEM(0x0, 0x0, 0x0),
+		.swt_board[0] = SWT_BOARD(0x0006ff77, 0x0006ff7c),
+		.swt_mem[0] = SWT_MEM(0x0001fffd, 0x0001fffd, 0x1ffff),
+		.swt_mem[1] = SWT_MEM(0x003fffff, 0x003fffff, 0x0),
 	},
 
 	[ACT_PWR_MODE] = {
-		.swt_board[0] = SWT_BOARD(0x74, 0x7c),
-		.swt_mem[0] = SWT_MEM(0x0001fffd, 0x0001fffd, 0x1fffd),
-		.swt_mem[1] = SWT_MEM(0x0, 0x0, 0x0),
+		.swt_board[0] = SWT_BOARD(0x0006ff77, 0x0000ff7c),
+		.swt_mem[0] = SWT_MEM(0x0001fffd, 0x0001fffd, 0x1ffff),
+		.swt_mem[1] = SWT_MEM(0x003fffff, 0x003fffff, 0x0),
 	},
 };
 
@@ -190,18 +214,23 @@
 		isb();
 	}
 
-	if (!is_local_state_run(CLUSTER_PWR_STATE(target_state))) {
-		/* TODO imx_set_wakeup() based on GIC config*/
-
+	if (is_local_state_retn(CLUSTER_PWR_STATE(target_state))) {
 		/*
 		 * just for sleep mode for now, need to update to
-		 * support more mode, same for suspend finish call back.
+		 * support more modes, same for suspend finish call back.
 		 */
 		mmio_write_32(IMX_CMC1_BASE + 0x10, 0x1);
 		mmio_write_32(IMX_CMC1_BASE + 0x20, 0x1);
+
+	} else if (is_local_state_off(CLUSTER_PWR_STATE(target_state))) {
+		/*
+		 * for cluster off state, put cluster into power down mode,
+		 * config the cluster clock to be off.
+		 */
+		mmio_write_32(IMX_CMC1_BASE + 0x10, 0x7);
+		mmio_write_32(IMX_CMC1_BASE + 0x20, 0xf);
 	}
 
-	/* TODO, may need to add more system level config here */
 	if (is_local_state_off(SYSTEM_PWR_STATE(target_state))) {
 		/*
 		 * low power mode config info used by upower
@@ -209,22 +238,66 @@
 		 */
 		imx_set_pwr_mode_cfg(ADMA_PWR_MODE);
 		imx_set_pwr_mode_cfg(ACT_PWR_MODE);
+		imx_set_pwr_mode_cfg(PD_PWR_MODE);
+
+		/* clear the upower wakeup */
+		upwr_xcp_set_rtd_apd_llwu(APD_DOMAIN, 0, NULL);
+		upower_wait_resp();
+
+		/* enable the USB wakeup */
+		usb_wakeup_enable(true);
+
+		/* config the WUU to enabled the wakeup source */
+		mmio_write_32(IMX_PCC3_BASE + 0x98, 0xc0800000);
+
+		/* !!! clear all the pad wakeup pending event */
+		mmio_write_32(IMX_WUU1_BASE + 0x20, 0xffffffff);
+
+		/* enable upower usb phy wakeup by default */
+		mmio_setbits_32(IMX_WUU1_BASE + 0x18, BIT(4) | BIT(1) | BIT(0));
+
+		/* enabled all pad wakeup by default */
+		mmio_write_32(IMX_WUU1_BASE + 0x8, 0xffffffff);
+
+		/* save the AD domain context before entering PD mode */
+		imx_apd_ctx_save(cpu);
 	}
 }
 
+extern void imx8ulp_init_scmi_server(void);
 void imx_domain_suspend_finish(const psci_power_state_t *target_state)
 {
 	unsigned int cpu = MPIDR_AFFLVL0_VAL(read_mpidr_el1());
 
 	if (is_local_state_off(SYSTEM_PWR_STATE(target_state))) {
-		/* TODO reverse setting for system level */
-	}
+		/* restore the ap domain context */
+		imx_apd_ctx_restore(cpu);
 
-	if (!is_local_state_run(CLUSTER_PWR_STATE(target_state))) {
-		mmio_write_32(IMX_CMC1_BASE + 0x20, 0x0);
-		mmio_write_32(IMX_CMC1_BASE + 0x10, 0x0);
+		/* clear the upower wakeup */
+		upwr_xcp_set_rtd_apd_llwu(APD_DOMAIN, 0, NULL);
+		upower_wait_resp();
+
+		/* disable all pad wakeup */
+		mmio_write_32(IMX_WUU1_BASE + 0x8, 0x0);
+
+		/* clear all the pad wakeup pending event */
+		mmio_write_32(IMX_WUU1_BASE + 0x20, 0xffffffff);
+
+		/*
+		 * disable the usb wakeup after resume to make sure the pending
+		 * usb wakeup in WUU can be cleared successfully, otherwise,
+		 * APD will resume failed in next PD mode.
+		 */
+		usb_wakeup_enable(false);
+
+		/* re-init the SCMI channel */
+		imx8ulp_init_scmi_server();
 	}
 
+	/* clear cluster's LPM setting. */
+	mmio_write_32(IMX_CMC1_BASE + 0x20, 0x0);
+	mmio_write_32(IMX_CMC1_BASE + 0x10, 0x0);
+
 	/* clear core's LPM setting */
 	mmio_write_32(IMX_CMC1_BASE + 0x50 + 0x4 * cpu, 0x0);
 	mmio_write_32(IMX_SIM1_BASE + 0x3c + 0x4 * cpu, 0x0);
diff --git a/plat/imx/imx8ulp/include/platform_def.h b/plat/imx/imx8ulp/include/platform_def.h
index 18f72fd..d5f4033 100644
--- a/plat/imx/imx8ulp/include/platform_def.h
+++ b/plat/imx/imx8ulp/include/platform_def.h
@@ -66,14 +66,25 @@
 #define IMX_CGC2_BASE			U(0x2da60000)
 #define IMX_PCC5_BASE			U(0x2da70000)
 #define IMX_CMC1_BASE			U(0x29240000)
+#define IMX_WUU1_BASE			U(0x29260000)
 #define IMX_SIM1_BASE			U(0x29290000)
+#define IMX_GPIOD_BASE			U(0x2e200000)
+#define IMX_GPIOE_BASE			U(0x2d000000)
+#define IMX_GPIOF_BASE			U(0x2d010000)
 #define IMX_WDOG3_BASE			U(0x292a0000)
-#define IMX_GPIOE_BASE			U(0x2D000000)
-#define IMX_GPIOD_BASE			U(0x2E200000)
-#define IMX_GPIOF_BASE			U(0x2D010000)
+#define IMX_TPM5_BASE			U(0x29340000)
 
 #define SRAM0_BASE			U(0x2201F000)
 
+#define IOMUXC_PTD_PCR_BASE		U(0x298c0000)
+#define IOMUXC_PTE_PCR_BASE		U(0x298c0080)
+#define IOMUXC_PTF_PCR_BASE		U(0x298c0100)
+#define IOMUXC_PSMI_BASE0		U(0x298c0800)
+#define IOMUXC_PSMI_BASE1		U(0x298c0838)
+#define IOMUXC_PSMI_BASE2		U(0x298c0954)
+#define IOMUXC_PSMI_BASE3		U(0x298c0994)
+#define IOMUXC_PSMI_BASE4		U(0x298c0a58)
+
 #define IMX_ROM_ENTRY			U(0x1000)
 #define COUNTER_FREQUENCY		1000000
 
diff --git a/plat/imx/imx8ulp/platform.mk b/plat/imx/imx8ulp/platform.mk
index 510785a..b267ffc 100644
--- a/plat/imx/imx8ulp/platform.mk
+++ b/plat/imx/imx8ulp/platform.mk
@@ -23,6 +23,7 @@
 				plat/imx/common/imx8_helpers.S		\
 				plat/imx/imx8ulp/imx8ulp_bl31_setup.c	\
 				plat/imx/imx8ulp/imx8ulp_psci.c		\
+				plat/imx/imx8ulp/apd_context.c		\
 				plat/imx/common/imx8_topology.c		\
 				plat/imx/common/imx_sip_svc.c		\
 				plat/imx/common/imx_sip_handler.c	\
@@ -33,6 +34,7 @@
 				drivers/delay_timer/generic_delay_timer.c \
 				plat/imx/imx8ulp/xrdc/xrdc_core.c		\
 				plat/imx/imx8ulp/imx8ulp_caam.c         \
+				plat/imx/imx8ulp/dram.c 	        \
 				drivers/scmi-msg/base.c			\
 				drivers/scmi-msg/entry.c		\
 				drivers/scmi-msg/smt.c			\