Tegra194: ras: verbose prints for SErrors
This patch provides verbose prints for RAS SErrors handled by the
firmware, for improved debugging.
Change-Id: Iaad8d183054d884f606dc4621da2cc6b2375bcf9
Signed-off-by: David Pu <dpu@nvidia.com>
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
diff --git a/plat/nvidia/tegra/include/t194/tegra194_ras_private.h b/plat/nvidia/tegra/include/t194/tegra194_ras_private.h
index c867b9d..336461a 100644
--- a/plat/nvidia/tegra/include/t194/tegra194_ras_private.h
+++ b/plat/nvidia/tegra/include/t194/tegra194_ras_private.h
@@ -18,6 +18,8 @@
/* RAS error node-specific auxiliary data */
struct ras_aux_data {
+ /* name for current RAS node. */
+ const char *name;
/* point to null-terminated ras_error array to convert error code to msg. */
const struct ras_error *error_records;
/*
@@ -218,6 +220,7 @@
#define DEFINE_ONE_RAS_AUX_DATA(node) \
{ \
+ .name = #node, \
.error_records = node##_uncorr_ras_errors, \
.err_ctrl = &node##_err_ctrl \
},
diff --git a/plat/nvidia/tegra/soc/t194/plat_ras.c b/plat/nvidia/tegra/soc/t194/plat_ras.c
index eb896a4af..54c2924 100644
--- a/plat/nvidia/tegra/soc/t194/plat_ras.c
+++ b/plat/nvidia/tegra/soc/t194/plat_ras.c
@@ -42,8 +42,8 @@
ras_lock();
- ERROR("exception reason=%u syndrome=0x%llx on 0x%lx at EL3.\n",
- ea_reason, syndrome, read_mpidr_el1());
+ ERROR("MPIDR 0x%lx: exception reason=%u syndrome=0x%llx\n",
+ read_mpidr(), ea_reason, syndrome);
/* Call RAS EA handler */
ret = ras_ea_handler(ea_reason, syndrome, cookie, handle, flags);
@@ -198,47 +198,90 @@
}
/* Function to handle error from one given node */
-static int32_t tegra194_ras_node_handler(uint32_t errselr,
+static int32_t tegra194_ras_node_handler(uint32_t errselr, const char *name,
const struct ras_error *errors, uint64_t status)
{
bool found = false;
uint32_t ierr = (uint32_t)ERR_STATUS_GET_FIELD(status, IERR);
uint32_t serr = (uint32_t)ERR_STATUS_GET_FIELD(status, SERR);
+ uint64_t val = 0;
/* not a valid error. */
if (ERR_STATUS_GET_FIELD(status, V) == 0U) {
return 0;
}
+ ERR_STATUS_SET_FIELD(val, V, 1);
+
+ /* keep the log print same as linux arm64_ras driver. */
+ ERROR("**************************************\n");
+ ERROR("RAS Error in %s, ERRSELR_EL1=0x%x:\n", name, errselr);
+ ERROR("\tStatus = 0x%llx\n", status);
+
/* Print uncorrectable errror information. */
if (ERR_STATUS_GET_FIELD(status, UE) != 0U) {
+ ERR_STATUS_SET_FIELD(val, UE, 1);
+ ERR_STATUS_SET_FIELD(val, UET, 1);
+
/* IERR to error message */
for (uint32_t i = 0; errors[i].error_msg != NULL; i++) {
if (ierr == errors[i].error_code) {
- ERROR("ERRSELR_EL1:0x%x\n, IERR = %s(0x%x)\n",
- errselr, errors[i].error_msg,
- errors[i].error_code);
+ ERROR("\tIERR = %s: 0x%x\n",
+ errors[i].error_msg, ierr);
+
found = true;
break;
}
}
if (!found) {
- ERROR("unknown uncorrectable eror, "
- "ERRSELR_EL1:0x%x, IERR: 0x%x\n", errselr, ierr);
+ ERROR("\tUnknown IERR: 0x%x\n", ierr);
}
- ERROR("SERR = %s(0x%x)\n", ras_serr_to_str(serr), serr);
+ ERROR("SERR = %s: 0x%x\n", ras_serr_to_str(serr), serr);
+
+ /* Overflow, multiple errors have been detected. */
+ if (ERR_STATUS_GET_FIELD(status, OF) != 0U) {
+ ERROR("\tOverflow (there may be more errors) - "
+ "Uncorrectable\n");
+ ERR_STATUS_SET_FIELD(val, OF, 1);
+ }
+
+ ERROR("\tUncorrectable (this is fatal)\n");
+
+ /* Miscellaneous Register Valid. */
+ if (ERR_STATUS_GET_FIELD(status, MV) != 0U) {
+ ERROR("\tMISC0 = 0x%lx\n", read_erxmisc0_el1());
+ ERROR("\tMISC1 = 0x%lx\n", read_erxmisc1_el1());
+ ERR_STATUS_SET_FIELD(val, MV, 1);
+ }
+
+ /* Address Valid. */
+ if (ERR_STATUS_GET_FIELD(status, AV) != 0U) {
+ ERROR("\tADDR = 0x%lx\n", read_erxaddr_el1());
+ ERR_STATUS_SET_FIELD(val, AV, 1);
+ }
+
+ /* Deferred error */
+ if (ERR_STATUS_GET_FIELD(status, DE) != 0U) {
+ ERROR("\tDeferred error\n");
+ ERR_STATUS_SET_FIELD(val, DE, 1);
+ }
+
} else {
/* For corrected error, simply clear it. */
VERBOSE("corrected RAS error is cleared: ERRSELR_EL1:0x%x, "
"IERR:0x%x, SERR:0x%x\n", errselr, ierr, serr);
+ ERR_STATUS_SET_FIELD(val, CE, 1);
}
+ ERROR("**************************************\n");
+
/* Write to clear reported errors. */
- write_erxstatus_el1(status);
+ write_erxstatus_el1(val);
+ /* error handled */
return 0;
}
@@ -251,6 +294,7 @@
const struct ras_aux_data *aux_data = info->aux_data;
const struct ras_error *errors;
uint32_t offset;
+ const char *node_name;
uint64_t status = 0ULL;
@@ -261,6 +305,7 @@
offset = (uint32_t)probe_data;
errors = aux_data[offset].error_records;
+ node_name = aux_data[offset].name;
assert(errors != NULL);
@@ -270,7 +315,8 @@
/* Retrieve status register from the error record */
status = read_erxstatus_el1();
- return tegra194_ras_node_handler(idx_start + offset, errors, status);
+ return tegra194_ras_node_handler(idx_start + offset, node_name,
+ errors, status);
}