Tegra210: bpmp: power management interface

This patch adds the driver to communicate with the BPMP processor
for power management use cases. BPMP controls the entry into cluster
and system power states. The Tegra210 platform port queries the BPMP
to calculate the target state for the cluster. In case BPMP does not
allow CCx entry, the core enters a power down state.

Change-Id: I9c40aef561607a0b02c49b7f8118570eb9105cc9
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
diff --git a/plat/nvidia/tegra/common/drivers/bpmp/bpmp.c b/plat/nvidia/tegra/common/drivers/bpmp/bpmp.c
new file mode 100644
index 0000000..c78b912
--- /dev/null
+++ b/plat/nvidia/tegra/common/drivers/bpmp/bpmp.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bpmp.h>
+#include <common/debug.h>
+#include <delay_timer.h>
+#include <errno.h>
+#include <mmio.h>
+#include <platform.h>
+#include <stdbool.h>
+#include <string.h>
+#include <tegra_def.h>
+
+#define BPMP_TIMEOUT_10US	10
+
+static uint32_t channel_base[NR_CHANNELS];
+static uint32_t bpmp_init_state = BPMP_INIT_PENDING;
+
+static uint32_t channel_field(unsigned int ch)
+{
+	return mmio_read_32(TEGRA_RES_SEMA_BASE + STA_OFFSET) & CH_MASK(ch);
+}
+
+static bool master_free(unsigned int ch)
+{
+	return channel_field(ch) == MA_FREE(ch);
+}
+
+static bool master_acked(unsigned int ch)
+{
+	return channel_field(ch) == MA_ACKD(ch);
+}
+
+static void signal_slave(unsigned int ch)
+{
+	mmio_write_32(TEGRA_RES_SEMA_BASE + CLR_OFFSET, CH_MASK(ch));
+}
+
+static void free_master(unsigned int ch)
+{
+	mmio_write_32(TEGRA_RES_SEMA_BASE + CLR_OFFSET,
+		      MA_ACKD(ch) ^ MA_FREE(ch));
+}
+
+/* should be called with local irqs disabled */
+int32_t tegra_bpmp_send_receive_atomic(int mrq, const void *ob_data, int ob_sz,
+		void *ib_data, int ib_sz)
+{
+	unsigned int ch = (unsigned int)plat_my_core_pos();
+	mb_data_t *p = (mb_data_t *)(uintptr_t)channel_base[ch];
+	int32_t ret = -ETIMEDOUT, timeout = 0;
+
+	if (bpmp_init_state == BPMP_INIT_COMPLETE) {
+
+		/* loop until BPMP is free */
+		for (timeout = 0; timeout < BPMP_TIMEOUT_10US; timeout++) {
+			if (master_free(ch) == true) {
+				break;
+			}
+
+			udelay(1);
+		}
+
+		if (timeout != BPMP_TIMEOUT_10US) {
+
+			/* generate the command struct */
+			p->code = mrq;
+			p->flags = DO_ACK;
+			(void)memcpy((void *)p->data, ob_data, (size_t)ob_sz);
+
+			/* signal command ready to the BPMP */
+			signal_slave(ch);
+			mmio_write_32(TEGRA_PRI_ICTLR_BASE + CPU_IEP_FIR_SET,
+				      (1UL << INT_SHR_SEM_OUTBOX_FULL));
+
+			/* loop until the command is executed */
+			for (timeout = 0; timeout < BPMP_TIMEOUT_10US; timeout++) {
+				if (master_acked(ch) == true) {
+					break;
+				}
+
+				udelay(1);
+			}
+
+			if (timeout != BPMP_TIMEOUT_10US) {
+
+				/* get the command response */
+				(void)memcpy(ib_data, (const void *)p->data,
+					     (size_t)ib_sz);
+
+				/* return error code */
+				ret = p->code;
+
+				/* free this channel */
+				free_master(ch);
+			}
+		}
+
+	} else {
+		/* return error code */
+		ret = -EINVAL;
+	}
+
+	if (timeout == BPMP_TIMEOUT_10US) {
+		ERROR("Timed out waiting for bpmp's response");
+	}
+
+	return ret;
+}
+
+int tegra_bpmp_init(void)
+{
+	uint32_t val, base;
+	unsigned int ch;
+	int ret = 0;
+
+	if (bpmp_init_state != BPMP_INIT_COMPLETE) {
+
+		/* check if the bpmp processor is alive. */
+		val = mmio_read_32(TEGRA_RES_SEMA_BASE + STA_OFFSET);
+		if (val != SIGN_OF_LIFE) {
+			ERROR("BPMP precessor not available\n");
+			ret = -ENOTSUP;
+		}
+
+		/* check if clock for the atomics block is enabled */
+		val = mmio_read_32(TEGRA_CAR_RESET_BASE + TEGRA_CLK_ENB_V);
+		if ((val & CAR_ENABLE_ATOMICS) == 0) {
+			ERROR("Clock to the atomics block is disabled\n");
+		}
+
+		/* check if the atomics block is out of reset */
+		val = mmio_read_32(TEGRA_CAR_RESET_BASE + TEGRA_RST_DEV_CLR_V);
+		if ((val & CAR_ENABLE_ATOMICS) == 0) {
+			ERROR("Reset to the atomics block is asserted\n");
+		}
+
+		/* base address to get the result from Atomics */
+		base = TEGRA_ATOMICS_BASE + RESULT0_REG_OFFSET;
+
+		/* channel area is setup by BPMP before signaling handshake */
+		for (ch = 0; ch < NR_CHANNELS; ch++) {
+
+			/* issue command to get the channel base address */
+			mmio_write_32(base, (ch << TRIGGER_ID_SHIFT) |
+				      ATOMIC_CMD_GET);
+
+			/* get the base address for the channel */
+			channel_base[ch] = mmio_read_32(base);
+
+			/* increment result register offset */
+			base += 4UL;
+		}
+
+		/* mark state as "initialized" */
+		if (ret == 0)
+			bpmp_init_state = BPMP_INIT_COMPLETE;
+
+		/* the channel values have to be visible across all cpus */
+		flush_dcache_range((uint64_t)channel_base, sizeof(channel_base));
+		flush_dcache_range((uint64_t)&bpmp_init_state,
+				   sizeof(bpmp_init_state));
+
+		INFO("%s: done\n", __func__);
+	}
+
+	return ret;
+}
diff --git a/plat/nvidia/tegra/include/drivers/bpmp.h b/plat/nvidia/tegra/include/drivers/bpmp.h
new file mode 100644
index 0000000..27f57df
--- /dev/null
+++ b/plat/nvidia/tegra/include/drivers/bpmp.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef BPMP_H
+#define BPMP_H
+
+#include <stdint.h>
+
+/* macro to enable clock to the Atomics block */
+#define CAR_ENABLE_ATOMICS	(1UL << 16)
+
+/* command to get the channel base addresses from bpmp */
+#define ATOMIC_CMD_GET		4UL
+
+/* Hardware IRQ # used to signal bpmp of an incoming command */
+#define INT_SHR_SEM_OUTBOX_FULL	6UL
+
+/* macros to decode the bpmp's state */
+#define CH_MASK(ch)		(0x3UL << ((ch) * 2UL))
+#define MA_FREE(ch)		(0x2UL << ((ch) * 2UL))
+#define MA_ACKD(ch)		(0x3UL << ((ch) * 2UL))
+
+/* response from bpmp to indicate it has powered up */
+#define SIGN_OF_LIFE		0xAAAAAAAAUL
+
+/* flags to indicate bpmp driver's state */
+#define BPMP_INIT_COMPLETE	0xBEEFF00DUL
+#define BPMP_INIT_PENDING	0xDEADBEEFUL
+
+/* requests serviced by the bpmp */
+#define MRQ_PING		0
+#define MRQ_QUERY_TAG		1
+#define MRQ_DO_IDLE		2
+#define MRQ_TOLERATE_IDLE	3
+#define MRQ_MODULE_LOAD		4
+#define MRQ_MODULE_UNLOAD	5
+#define MRQ_SWITCH_CLUSTER	6
+#define MRQ_TRACE_MODIFY	7
+#define MRQ_WRITE_TRACE		8
+#define MRQ_THREADED_PING	9
+#define MRQ_CPUIDLE_USAGE	10
+#define MRQ_MODULE_MAIL		11
+#define MRQ_SCX_ENABLE		12
+#define MRQ_BPMPIDLE_USAGE	14
+#define MRQ_HEAP_USAGE		15
+#define MRQ_SCLK_SKIP_SET_RATE	16
+#define MRQ_ENABLE_SUSPEND	17
+#define MRQ_PASR_MASK		18
+#define MRQ_DEBUGFS		19
+#define MRQ_THERMAL		27
+
+/* Tegra PM states as known to BPMP */
+#define TEGRA_PM_CC1		9
+#define TEGRA_PM_CC4		12
+#define TEGRA_PM_CC6		14
+#define TEGRA_PM_CC7		15
+#define TEGRA_PM_SC1		17
+#define TEGRA_PM_SC2		18
+#define TEGRA_PM_SC3		19
+#define TEGRA_PM_SC4		20
+#define TEGRA_PM_SC7		23
+
+/* flag to indicate if entry into a CCx power state is allowed */
+#define BPMP_CCx_ALLOWED	0UL
+
+/* number of communication channels to interact with the bpmp */
+#define NR_CHANNELS		4U
+
+/* flag to ask bpmp to acknowledge command packet */
+#define NO_ACK			(0UL << 0UL)
+#define DO_ACK			(1UL << 0UL)
+
+/* size of the command/response data */
+#define MSG_DATA_MAX_SZ		120U
+
+/**
+ * command/response packet to/from the bpmp
+ *
+ * command
+ * -------
+ * code: MRQ_* command
+ * flags: DO_ACK or NO_ACK
+ * data:
+ * 	[0] = cpu #
+ * 	[1] = cluster power state (TEGRA_PM_CCx)
+ * 	[2] = system power state (TEGRA_PM_SCx)
+ *
+ * response
+ * ---------
+ * code: error code
+ * flags: not used
+ * data:
+ * 	[0-3] = response value
+ */
+typedef struct mb_data {
+	int32_t code;
+	uint32_t flags;
+	uint8_t data[MSG_DATA_MAX_SZ];
+} mb_data_t;
+
+/**
+ * Function to initialise the interface with the bpmp
+ */
+int tegra_bpmp_init(void);
+
+/**
+ * Handler to send a MRQ_* command to the bpmp
+ */
+int32_t tegra_bpmp_send_receive_atomic(int mrq, const void *ob_data, int ob_sz,
+		void *ib_data, int ib_sz);
+
+#endif /* BPMP_H */
diff --git a/plat/nvidia/tegra/include/t210/tegra_def.h b/plat/nvidia/tegra/include/t210/tegra_def.h
index ec9b68e..5565c72 100644
--- a/plat/nvidia/tegra/include/t210/tegra_def.h
+++ b/plat/nvidia/tegra/include/t210/tegra_def.h
@@ -33,6 +33,12 @@
 #define PLAT_MAX_OFF_STATE		(PSTATE_ID_SOC_POWERDN + U(1))
 
 /*******************************************************************************
+ * iRAM memory constants
+ ******************************************************************************/
+#define TEGRA_IRAMA_BASE		0x40000000
+#define TEGRA_IRAMB_BASE		0x40010000
+
+/*******************************************************************************
  * GIC memory map
  ******************************************************************************/
 #define TEGRA_GICD_BASE			U(0x50041000)
@@ -56,6 +62,20 @@
 					 ENABLE_WRAP_INCR_MASTER0_BIT)
 
 /*******************************************************************************
+ * Tegra Resource Semaphore constants
+ ******************************************************************************/
+#define TEGRA_RES_SEMA_BASE		0x60001000UL
+#define  STA_OFFSET			0UL
+#define  SET_OFFSET			4UL
+#define  CLR_OFFSET			8UL
+
+/*******************************************************************************
+ * Tegra Primary Interrupt Controller constants
+ ******************************************************************************/
+#define TEGRA_PRI_ICTLR_BASE		0x60004000UL
+#define  CPU_IEP_FIR_SET		0x18UL
+
+/*******************************************************************************
  * Tegra micro-seconds timer constants
  ******************************************************************************/
 #define TEGRA_TMRUS_BASE		U(0x60005010)
@@ -67,6 +87,8 @@
 #define TEGRA_CAR_RESET_BASE		U(0x60006000)
 #define TEGRA_GPU_RESET_REG_OFFSET	U(0x28C)
 #define  GPU_RESET_BIT			(U(1) << 24)
+#define TEGRA_RST_DEV_CLR_V		U(0x434)
+#define TEGRA_CLK_ENB_V			U(0x440)
 
 /*******************************************************************************
  * Tegra Flow Controller constants
@@ -109,6 +131,15 @@
 #define TEGRA_PMC_BASE			U(0x7000E400)
 
 /*******************************************************************************
+ * Tegra Atomics constants
+ ******************************************************************************/
+#define TEGRA_ATOMICS_BASE		0x70016000UL
+#define  TRIGGER0_REG_OFFSET		0UL
+#define  TRIGGER_WIDTH_SHIFT		4UL
+#define  TRIGGER_ID_SHIFT		16UL
+#define  RESULT0_REG_OFFSET		0xC00UL
+
+/*******************************************************************************
  * Tegra Memory Controller constants
  ******************************************************************************/
 #define TEGRA_MC_BASE			U(0x70019000)
diff --git a/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c b/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
index bd4beaa..b7f4b90 100644
--- a/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
+++ b/plat/nvidia/tegra/soc/t210/plat_psci_handlers.c
@@ -5,9 +5,6 @@
  */
 
 #include <assert.h>
-
-#include <platform_def.h>
-
 #include <arch_helpers.h>
 #include <common/debug.h>
 #include <drivers/delay_timer.h>
@@ -15,12 +12,14 @@
 #include <lib/psci/psci.h>
 #include <plat/common/platform.h>
 
+#include <bpmp.h>
 #include <flowctrl.h>
 #include <pmc.h>
+#include <platform_def.h>
+#include <security_engine.h>
 #include <tegra_def.h>
 #include <tegra_private.h>
 #include <tegra_platform.h>
-#include <security_engine.h>
 
 /*
  * Register used to clear CPU reset signals. Each CPU has two reset
@@ -57,7 +56,7 @@
 		 * Cluster powerdown/idle request only for afflvl 1
 		 */
 		req_state->pwr_domain_state[MPIDR_AFFLVL1] = state_id;
-		req_state->pwr_domain_state[MPIDR_AFFLVL0] = state_id;
+		req_state->pwr_domain_state[MPIDR_AFFLVL0] = PSTATE_ID_CORE_POWERDN;
 
 		break;
 
@@ -89,9 +88,11 @@
 					     const plat_local_state_t *states,
 					     unsigned int ncpu)
 {
-	plat_local_state_t target = *states;
+	plat_local_state_t target = PSCI_LOCAL_STATE_RUN;
 	int cpu = plat_my_core_pos();
 	int core_pos = read_mpidr() & MPIDR_CPU_MASK;
+	uint32_t bpmp_reply, data[3];
+	int ret;
 
 	/* get the power state at this level */
 	if (lvl == MPIDR_AFFLVL1)
@@ -99,19 +100,57 @@
 	if (lvl == MPIDR_AFFLVL2)
 		target = *(states + cpu);
 
-	/* Cluster idle/power-down */
-	if ((lvl == MPIDR_AFFLVL1) && ((target == PSTATE_ID_CLUSTER_IDLE) ||
-	    (target == PSTATE_ID_CLUSTER_POWERDN))) {
-		return target;
-	}
+	if ((lvl == MPIDR_AFFLVL1) && (target == PSTATE_ID_CLUSTER_IDLE)) {
+
+		/* initialize the bpmp interface */
+		(void)tegra_bpmp_init();
+
+		/* Cluster idle */
+		data[0] = (uint32_t)cpu;
+		data[1] = TEGRA_PM_CC6;
+		data[2] = TEGRA_PM_SC1;
+		ret = tegra_bpmp_send_receive_atomic(MRQ_DO_IDLE,
+				(void *)&data, (int)sizeof(data),
+				(void *)&bpmp_reply, (int)sizeof(bpmp_reply));
 
-	/* System Suspend */
-	if (((lvl == MPIDR_AFFLVL2) || (lvl == MPIDR_AFFLVL1)) &&
-	    (target == PSTATE_ID_SOC_POWERDN))
-		return PSTATE_ID_SOC_POWERDN;
+		/* check if cluster idle entry is allowed */
+		if ((ret != 0L) || (bpmp_reply != BPMP_CCx_ALLOWED)) {
 
-	/* default state */
-	return PSCI_LOCAL_STATE_RUN;
+			/* Cluster idle not allowed */
+			target = PSCI_LOCAL_STATE_RUN;
+		}
+
+	} else if ((lvl == MPIDR_AFFLVL1) && (target == PSTATE_ID_CLUSTER_POWERDN)) {
+
+		/* initialize the bpmp interface */
+		(void)tegra_bpmp_init();
+
+		/* Cluster power-down */
+		data[0] = (uint32_t)cpu;
+		data[1] = TEGRA_PM_CC7;
+		data[2] = TEGRA_PM_SC1;
+		ret = tegra_bpmp_send_receive_atomic(MRQ_DO_IDLE,
+				(void *)&data, (int)sizeof(data),
+				(void *)&bpmp_reply, (int)sizeof(bpmp_reply));
+
+		/* check if cluster power down is allowed */
+		if ((ret != 0L) || (bpmp_reply != BPMP_CCx_ALLOWED)) {
+
+			/* Cluster power down not allowed */
+			target = PSCI_LOCAL_STATE_RUN;
+		}
+
+	} else if (((lvl == MPIDR_AFFLVL2) || (lvl == MPIDR_AFFLVL1)) &&
+	    (target == PSTATE_ID_SOC_POWERDN)) {
+
+		/* System Suspend */
+		target = PSTATE_ID_SOC_POWERDN;
+
+	} else {
+		; /* do nothing */
+	}
+
+	return target;
 }
 
 int tegra_soc_pwr_domain_suspend(const psci_power_state_t *target_state)
@@ -132,6 +171,7 @@
 			(stateid_afflvl1 == PSTATE_ID_SOC_POWERDN));
 
 		if (tegra_chipid_is_t210_b01()) {
+
 			/* Suspend se/se2 and pka1 */
 			if (tegra_se_suspend() != 0) {
 				ret = PSCI_E_INTERN_FAIL;
@@ -143,21 +183,21 @@
 			}
 		}
 
-		/* suspend the entire soc */
+		/* enter system suspend */
 		if (ret == PSCI_E_SUCCESS) {
 			tegra_fc_soc_powerdn(mpidr);
 		}
 
 	} else if (stateid_afflvl1 == PSTATE_ID_CLUSTER_IDLE) {
 
-		assert(stateid_afflvl0 == PSTATE_ID_CLUSTER_IDLE);
+		assert(stateid_afflvl0 == PSTATE_ID_CORE_POWERDN);
 
 		/* Prepare for cluster idle */
 		tegra_fc_cluster_idle(mpidr);
 
 	} else if (stateid_afflvl1 == PSTATE_ID_CLUSTER_POWERDN) {
 
-		assert(stateid_afflvl0 == PSTATE_ID_CLUSTER_POWERDN);
+		assert(stateid_afflvl0 == PSTATE_ID_CORE_POWERDN);
 
 		/* Prepare for cluster powerdn */
 		tegra_fc_cluster_powerdn(mpidr);
@@ -168,7 +208,8 @@
 		tegra_fc_cpu_powerdn(mpidr);
 
 	} else {
-		ERROR("%s: Unknown state id\n", __func__);
+		ERROR("%s: Unknown state id (%d, %d, %d)\n", __func__,
+			stateid_afflvl2, stateid_afflvl1, stateid_afflvl0);
 		ret = PSCI_E_NOT_SUPPORTED;
 	}
 
diff --git a/plat/nvidia/tegra/soc/t210/plat_setup.c b/plat/nvidia/tegra/soc/t210/plat_setup.c
index 0c2fe96..451da13 100644
--- a/plat/nvidia/tegra/soc/t210/plat_setup.c
+++ b/plat/nvidia/tegra/soc/t210/plat_setup.c
@@ -5,6 +5,7 @@
  */
 
 #include <arch_helpers.h>
+#include <bpmp.h>
 #include <common/bl_common.h>
 #include <drivers/console.h>
 #include <lib/xlat_tables/xlat_tables_v2.h>
@@ -24,6 +25,10 @@
  * Table of regions to map using the MMU.
  */
 static const mmap_region_t tegra_mmap[] = {
+	MAP_REGION_FLAT(TEGRA_IRAMA_BASE, 0x10000, /* 64KB */
+			MT_DEVICE | MT_RW | MT_SECURE),
+	MAP_REGION_FLAT(TEGRA_IRAMB_BASE, 0x10000, /* 64KB */
+			MT_DEVICE | MT_RW | MT_SECURE),
 	MAP_REGION_FLAT(MMIO_RANGE_0_ADDR, MMIO_RANGE_SIZE,
 			MT_DEVICE | MT_RW | MT_SECURE),
 	MAP_REGION_FLAT(MMIO_RANGE_1_ADDR, MMIO_RANGE_SIZE,
diff --git a/plat/nvidia/tegra/soc/t210/platform_t210.mk b/plat/nvidia/tegra/soc/t210/platform_t210.mk
index 3cc71b2..4749d76 100644
--- a/plat/nvidia/tegra/soc/t210/platform_t210.mk
+++ b/plat/nvidia/tegra/soc/t210/platform_t210.mk
@@ -26,6 +26,7 @@
 
 BL31_SOURCES		+=	lib/cpus/aarch64/cortex_a53.S			\
 				lib/cpus/aarch64/cortex_a57.S			\
+				${COMMON_DIR}/drivers/bpmp/bpmp.c	\
 				${COMMON_DIR}/drivers/flowctrl/flowctrl.c	\
 				${COMMON_DIR}/drivers/memctrl/memctrl_v1.c	\
 				${SOC_DIR}/plat_psci_handlers.c			\