Tegra186: mce: Uncore Perfmon ARI Programming

Uncore perfmon appears to the CPU as a set of uncore perfmon registers
which can be read and written using the ARI interface. The MCE code
sequence handles reads and writes to these registers by manipulating
the underlying T186 uncore hardware.

To access an uncore perfmon register, CPU software writes the ARI
request registers to specify

* whether the operation is a read or a write,
* which uncore perfmon register to access,
* the uncore perfmon unit, group, and counter number (if necessary),
* the data to write (if the operation is a write).

It then initiates an ARI request to run the uncore perfmon sequence in
the MCE and reads the resulting value of the uncore perfmon register
and any status information from the ARI response registers.

The NS world's MCE driver issues MCE_CMD_UNCORE_PERFMON_REQ command
for the EL3 layer to start the entire sequence. Once the request
completes, the NS world would receive the command status in the X0
register and the command data in the X1 register.

Change-Id: I20bf2eca2385f7c8baa81e9445617ae711ecceea
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
diff --git a/plat/nvidia/tegra/soc/t186/drivers/include/mce.h b/plat/nvidia/tegra/soc/t186/drivers/include/mce.h
index 38ca32c..66e212b 100644
--- a/plat/nvidia/tegra/soc/t186/drivers/include/mce.h
+++ b/plat/nvidia/tegra/soc/t186/drivers/include/mce.h
@@ -95,6 +95,7 @@
 	MCE_CMD_ROC_FLUSH_CACHE,
 	MCE_CMD_ROC_CLEAN_CACHE,
 	MCE_CMD_ENABLE_LATIC,
+	MCE_CMD_UNCORE_PERFMON_REQ,
 	MCE_CMD_IS_CCX_ALLOWED = 0xFE,
 	MCE_CMD_MAX = 0xFF,
 } mce_cmd_t;
@@ -202,6 +203,54 @@
 } mca_arg_t;
 
 /*******************************************************************************
+ * Uncore PERFMON ARI struct
+ ******************************************************************************/
+typedef union uncore_perfmon_req {
+	struct perfmon_command {
+		/*
+		 * Commands: 0 = READ, 1 = WRITE
+		 */
+		uint64_t cmd:8;
+		/*
+		 * The unit group: L2=0, L3=1, ROC=2, MC=3, IOB=4
+		 */
+		uint64_t grp:4;
+		/*
+		 * Unit selector: Selects the unit instance, with 0 = Unit
+		 * = (number of units in group) - 1.
+		 */
+		uint64_t unit:4;
+		/*
+		 * Selects the uncore perfmon register to access
+		 */
+		uint64_t reg:8;
+		/*
+		 * Counter number. Selects which counter to use for
+		 * registers NV_PMEVCNTR and NV_PMEVTYPER.
+		 */
+		uint64_t counter:8;
+	} perfmon_command;
+	struct perfmon_status {
+		/*
+		 * Resulting command status
+		 */
+		uint64_t val:8;
+		uint64_t unused:24;
+	} perfmon_status;
+	uint64_t data;
+} uncore_perfmon_req_t;
+
+#define UNCORE_PERFMON_CMD_READ			0
+#define UNCORE_PERFMON_CMD_WRITE		1
+
+#define UNCORE_PERFMON_CMD_MASK			0xFF
+#define UNCORE_PERFMON_UNIT_GRP_MASK		0xF
+#define UNCORE_PERFMON_SELECTOR_MASK		0xF
+#define UNCORE_PERFMON_REG_MASK			0xFF
+#define UNCORE_PERFMON_CTR_MASK			0xFF
+#define UNCORE_PERFMON_RESP_STATUS_MASK		0xFF
+
+/*******************************************************************************
  * Structure populated by arch specific code to export routines which perform
  * common low level MCE functions
  ******************************************************************************/
@@ -331,6 +380,12 @@
 	 * reset the entire system
 	 */
 	void (*enter_ccplex_state)(uint32_t ari_base, uint32_t state_idx);
+	/*
+	 * This ARI request reads/writes data from/to Uncore PERFMON
+	 * registers
+	 */
+	int (*read_write_uncore_perfmon)(uint32_t ari_base,
+			uncore_perfmon_req_t req, uint64_t *data);
 } arch_mce_ops_t;
 
 int mce_command_handler(mce_cmd_t cmd, uint64_t arg0, uint64_t arg1,
@@ -363,6 +418,8 @@
 uint64_t ari_read_write_mca(uint32_t ari_base, mca_cmd_t cmd, uint64_t *data);
 int ari_update_ccplex_gsc(uint32_t ari_base, uint32_t gsc_idx);
 void ari_enter_ccplex_state(uint32_t ari_base, uint32_t state_idx);
+int ari_read_write_uncore_perfmon(uint32_t ari_base,
+		uncore_perfmon_req_t req, uint64_t *data);
 
 int nvg_enter_cstate(uint32_t ari_base, uint32_t state, uint32_t wake_time);
 int nvg_update_cstate_info(uint32_t ari_base, uint32_t cluster, uint32_t ccplex,
diff --git a/plat/nvidia/tegra/soc/t186/drivers/mce/ari.c b/plat/nvidia/tegra/soc/t186/drivers/mce/ari.c
index 147a358..e11d160 100644
--- a/plat/nvidia/tegra/soc/t186/drivers/mce/ari.c
+++ b/plat/nvidia/tegra/soc/t186/drivers/mce/ari.c
@@ -389,3 +389,41 @@
 	 */
 	(void)ari_request_wait(ari_base, 0, TEGRA_ARI_MISC_CCPLEX, state_idx, 0);
 }
+
+int ari_read_write_uncore_perfmon(uint32_t ari_base,
+		uncore_perfmon_req_t req, uint64_t *data)
+{
+	int ret;
+	uint32_t val;
+
+	/* sanity check input parameters */
+	if (req.perfmon_command.cmd == UNCORE_PERFMON_CMD_READ && !data) {
+		ERROR("invalid parameters\n");
+		return EINVAL;
+	}
+
+	/*
+	 * For "write" commands get the value that has to be written
+	 * to the uncore perfmon registers
+	 */
+	val = (req.perfmon_command.cmd == UNCORE_PERFMON_CMD_WRITE) ?
+		*data : 0;
+
+	ret = ari_request_wait(ari_base, 0, TEGRA_ARI_PERFMON, val, req.data);
+	if (ret)
+		return ret;
+
+	/* read the command status value */
+	req.perfmon_status.val = ari_get_response_high(ari_base) &
+				 UNCORE_PERFMON_RESP_STATUS_MASK;
+
+	/*
+	 * For "read" commands get the data from the uncore
+	 * perfmon registers
+	 */
+	if ((req.perfmon_status.val == 0) && (req.perfmon_command.cmd ==
+	     UNCORE_PERFMON_CMD_READ))
+		*data = ari_get_response_low(ari_base);
+
+	return (int)req.perfmon_status.val;
+}
diff --git a/plat/nvidia/tegra/soc/t186/drivers/mce/mce.c b/plat/nvidia/tegra/soc/t186/drivers/mce/mce.c
index 1a712dc..981545d 100644
--- a/plat/nvidia/tegra/soc/t186/drivers/mce/mce.c
+++ b/plat/nvidia/tegra/soc/t186/drivers/mce/mce.c
@@ -61,7 +61,8 @@
 	.roc_clean_cache = ari_roc_clean_cache,
 	.read_write_mca = ari_read_write_mca,
 	.update_ccplex_gsc = ari_update_ccplex_gsc,
-	.enter_ccplex_state = ari_enter_ccplex_state
+	.enter_ccplex_state = ari_enter_ccplex_state,
+	.read_write_uncore_perfmon = ari_read_write_uncore_perfmon
 };
 
 /* ARI functions handlers */
@@ -82,7 +83,8 @@
 	.roc_clean_cache = ari_roc_clean_cache,
 	.read_write_mca = ari_read_write_mca,
 	.update_ccplex_gsc = ari_update_ccplex_gsc,
-	.enter_ccplex_state = ari_enter_ccplex_state
+	.enter_ccplex_state = ari_enter_ccplex_state,
+	.read_write_uncore_perfmon = ari_read_write_uncore_perfmon
 };
 
 typedef struct mce_config {
@@ -173,6 +175,7 @@
 	uint64_t ret64 = 0, arg3, arg4, arg5;
 	int ret = 0;
 	mca_cmd_t mca_cmd;
+	uncore_perfmon_req_t req;
 	cpu_context_t *ctx = cm_get_context(NON_SECURE);
 	gp_regs_t *gp_regs = get_gpregs_ctx(ctx);
 
@@ -374,6 +377,15 @@
 
 		break;
 #endif
+
+	case MCE_CMD_UNCORE_PERFMON_REQ:
+		memcpy(&req, &arg0, sizeof(arg0));
+		ret = ops->read_write_uncore_perfmon(cpu_ari_base, req, &arg1);
+
+		/* update context to return data */
+		write_ctx_reg(gp_regs, CTX_GPREG_X1, arg1);
+		break;
+
 	default:
 		ERROR("unknown MCE command (%d)\n", cmd);
 		return EINVAL;
diff --git a/plat/nvidia/tegra/soc/t186/plat_sip_calls.c b/plat/nvidia/tegra/soc/t186/plat_sip_calls.c
index c7a2c41..9f48ddd 100644
--- a/plat/nvidia/tegra/soc/t186/plat_sip_calls.c
+++ b/plat/nvidia/tegra/soc/t186/plat_sip_calls.c
@@ -65,6 +65,7 @@
 #define TEGRA_SIP_MCE_CMD_ROC_FLUSH_CACHE		0x82FFFF0E
 #define TEGRA_SIP_MCE_CMD_ROC_CLEAN_CACHE		0x82FFFF0F
 #define TEGRA_SIP_MCE_CMD_ENABLE_LATIC			0x82FFFF10
+#define TEGRA_SIP_MCE_CMD_UNCORE_PERFMON_REQ		0x82FFFF11
 
 /*******************************************************************************
  * This function is responsible for handling all T186 SiP calls
@@ -102,6 +103,7 @@
 	case TEGRA_SIP_MCE_CMD_ROC_FLUSH_CACHE:
 	case TEGRA_SIP_MCE_CMD_ROC_CLEAN_CACHE:
 	case TEGRA_SIP_MCE_CMD_ENABLE_LATIC:
+	case TEGRA_SIP_MCE_CMD_UNCORE_PERFMON_REQ:
 
 		/* clean up the high bits */
 		smc_fid &= MCE_CMD_MASK;