David Pu | 70f6597 | 2019-03-18 15:14:49 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2020, NVIDIA Corporation. All rights reserved. |
| 3 | * |
| 4 | * SPDX-License-Identifier: BSD-3-Clause |
| 5 | */ |
| 6 | |
| 7 | #include <stdbool.h> |
| 8 | #include <stdint.h> |
| 9 | |
| 10 | #include <common/debug.h> |
| 11 | #include <lib/bakery_lock.h> |
| 12 | #include <lib/extensions/ras.h> |
| 13 | #include <lib/utils_def.h> |
| 14 | #include <services/sdei.h> |
| 15 | |
| 16 | #include <plat/common/platform.h> |
| 17 | #include <platform_def.h> |
| 18 | #include <tegra194_ras_private.h> |
| 19 | #include <tegra_def.h> |
| 20 | #include <tegra_platform.h> |
| 21 | #include <tegra_private.h> |
| 22 | |
| 23 | /* |
| 24 | * ERR<n>FR bits[63:32], it indicates supported RAS errors which can be enabled |
| 25 | * by setting corresponding bits in ERR<n>CTLR |
| 26 | */ |
| 27 | #define ERR_FR_EN_BITS_MASK 0xFFFFFFFF00000000ULL |
| 28 | |
| 29 | /* bakery lock for platform RAS handler. */ |
| 30 | static DEFINE_BAKERY_LOCK(ras_handler_lock); |
| 31 | #define ras_lock() bakery_lock_get(&ras_handler_lock) |
| 32 | #define ras_unlock() bakery_lock_release(&ras_handler_lock) |
| 33 | |
| 34 | /* |
| 35 | * Function to handle an External Abort received at EL3. |
| 36 | * This function is invoked by RAS framework. |
| 37 | */ |
| 38 | static void tegra194_ea_handler(unsigned int ea_reason, uint64_t syndrome, |
| 39 | void *cookie, void *handle, uint64_t flags) |
| 40 | { |
| 41 | int32_t ret; |
| 42 | |
| 43 | ras_lock(); |
| 44 | |
| 45 | ERROR("exception reason=%u syndrome=0x%llx on 0x%lx at EL3.\n", |
| 46 | ea_reason, syndrome, read_mpidr_el1()); |
| 47 | |
| 48 | /* Call RAS EA handler */ |
| 49 | ret = ras_ea_handler(ea_reason, syndrome, cookie, handle, flags); |
| 50 | if (ret != 0) { |
| 51 | ERROR("RAS error handled!\n"); |
| 52 | ret = sdei_dispatch_event(TEGRA_SDEI_EP_EVENT_0 + |
| 53 | plat_my_core_pos()); |
| 54 | if (ret != 0) |
| 55 | ERROR("sdei_dispatch_event returned %d\n", ret); |
| 56 | } else { |
| 57 | ERROR("Not a RAS error!\n"); |
| 58 | } |
| 59 | |
| 60 | ras_unlock(); |
| 61 | } |
| 62 | |
| 63 | /* Function to enable uncorrectable errors as External abort (SError) */ |
| 64 | void tegra194_ras_enable(void) |
| 65 | { |
| 66 | VERBOSE("%s\n", __func__); |
| 67 | |
| 68 | /* skip RAS enablement if not a silicon platform. */ |
| 69 | if (!tegra_platform_is_silicon()) { |
| 70 | return; |
| 71 | } |
| 72 | |
| 73 | /* |
| 74 | * Iterate for each group(num_idx ERRSELRs starting from idx_start) |
| 75 | * use normal for loop instead of for_each_err_record_info to get rid |
| 76 | * of MISRA noise.. |
| 77 | */ |
| 78 | for (uint32_t i = 0U; i < err_record_mappings.num_err_records; i++) { |
| 79 | |
| 80 | const struct err_record_info *info = &err_record_mappings.err_records[i]; |
| 81 | |
| 82 | uint32_t idx_start = info->sysreg.idx_start; |
| 83 | uint32_t num_idx = info->sysreg.num_idx; |
| 84 | const struct ras_aux_data *aux_data = (const struct ras_aux_data *)info->aux_data; |
| 85 | |
| 86 | assert(aux_data != NULL); |
| 87 | |
| 88 | for (uint32_t j = 0; j < num_idx; j++) { |
| 89 | uint64_t err_ctrl = 0ULL; |
| 90 | |
| 91 | /* enable SError reporting for uncorrectable error */ |
| 92 | ERR_CTLR_ENABLE_FIELD(err_ctrl, UE); |
| 93 | ERR_CTLR_ENABLE_FIELD(err_ctrl, ED); |
| 94 | |
| 95 | /* |
| 96 | * Catch error if something wrong with the RAS aux data |
| 97 | * record table. |
| 98 | */ |
| 99 | assert(aux_data[j].err_ctrl != NULL); |
| 100 | |
| 101 | /* enable the specified errors */ |
| 102 | err_ctrl |= aux_data[j].err_ctrl(); |
| 103 | |
| 104 | /* Write to ERRSELR_EL1 to select the error record */ |
| 105 | ser_sys_select_record(idx_start + j); |
| 106 | |
| 107 | /* enable specified errors */ |
| 108 | write_erxctlr_el1(err_ctrl); |
| 109 | |
| 110 | /* |
| 111 | * Check if all the bit settings have been enabled to detect |
| 112 | * uncorrected/corrected errors, if not assert. |
| 113 | */ |
| 114 | assert(read_erxctlr_el1() == err_ctrl); |
| 115 | } |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | /* Function to probe an error from error record group. */ |
| 120 | static int32_t tegra194_ras_record_probe(const struct err_record_info *info, |
| 121 | int *probe_data) |
| 122 | { |
| 123 | /* Skip probing if not a silicon platform */ |
| 124 | if (!tegra_platform_is_silicon()) { |
| 125 | return 0; |
| 126 | } |
| 127 | |
| 128 | return ser_probe_sysreg(info->sysreg.idx_start, info->sysreg.num_idx, probe_data); |
| 129 | } |
| 130 | |
| 131 | /* Function to handle error from one given node */ |
| 132 | static int32_t tegra194_ras_node_handler(const struct ras_error *errors, uint64_t status) |
| 133 | { |
| 134 | bool found = false; |
| 135 | uint32_t ierr = (uint32_t)ERR_STATUS_GET_FIELD(status, IERR); |
| 136 | uint32_t serr = (uint32_t)ERR_STATUS_GET_FIELD(status, SERR); |
| 137 | |
| 138 | /* IERR to error message */ |
| 139 | for (uint32_t i = 0; errors[i].error_msg != NULL; i++) { |
| 140 | if (ierr == errors[i].error_code) { |
| 141 | ERROR("IERR = %s(0x%x)\n", |
| 142 | errors[i].error_msg, errors[i].error_code); |
| 143 | found = true; |
| 144 | break; |
| 145 | } |
| 146 | } |
| 147 | if (!found) { |
| 148 | ERROR("unknown IERR: 0x%x\n", ierr); |
| 149 | } |
| 150 | |
| 151 | ERROR("SERR = %s(0x%x)\n", ras_serr_to_str(serr), serr); |
| 152 | |
| 153 | /* Write to clear reported errors. */ |
| 154 | write_erxstatus_el1(status); |
| 155 | |
| 156 | return 0; |
| 157 | } |
| 158 | |
| 159 | /* Function to handle one error node from an error record group. */ |
| 160 | static int32_t tegra194_ras_record_handler(const struct err_record_info *info, |
| 161 | int probe_data, const struct err_handler_data *const data) |
| 162 | { |
| 163 | uint32_t num_idx = info->sysreg.num_idx; |
| 164 | uint32_t idx_start = info->sysreg.idx_start; |
| 165 | const struct ras_aux_data *aux_data = info->aux_data; |
| 166 | |
| 167 | uint64_t status = 0ULL; |
| 168 | |
| 169 | VERBOSE("%s\n", __func__); |
| 170 | |
| 171 | assert(probe_data >= 0); |
| 172 | assert((uint32_t)probe_data < num_idx); |
| 173 | |
| 174 | uint32_t offset = (uint32_t)probe_data; |
| 175 | const struct ras_error *errors = aux_data[offset].error_records; |
| 176 | |
| 177 | assert(errors != NULL); |
| 178 | |
| 179 | /* Write to ERRSELR_EL1 to select the error record */ |
| 180 | ser_sys_select_record(idx_start + offset); |
| 181 | |
| 182 | /* Retrieve status register from the error record */ |
| 183 | status = read_erxstatus_el1(); |
| 184 | |
| 185 | assert(ERR_STATUS_GET_FIELD(status, V) != 0U); |
| 186 | assert(ERR_STATUS_GET_FIELD(status, UE) != 0U); |
| 187 | |
| 188 | return tegra194_ras_node_handler(errors, status); |
| 189 | } |
| 190 | |
| 191 | |
| 192 | /* Instantiate RAS nodes */ |
| 193 | PER_CORE_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE) |
| 194 | PER_CLUSTER_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE) |
| 195 | SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE) |
| 196 | CCPLEX_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE) |
| 197 | |
| 198 | /* Instantiate RAS node groups */ |
| 199 | static struct ras_aux_data per_core_ras_group[] = { |
| 200 | PER_CORE_RAS_GROUP_NODES |
| 201 | }; |
| 202 | |
| 203 | static struct ras_aux_data per_cluster_ras_group[] = { |
| 204 | PER_CLUSTER_RAS_GROUP_NODES |
| 205 | }; |
| 206 | |
| 207 | static struct ras_aux_data scf_l3_ras_group[] = { |
| 208 | SCF_L3_BANK_RAS_GROUP_NODES |
| 209 | }; |
| 210 | |
| 211 | static struct ras_aux_data ccplex_ras_group[] = { |
| 212 | CCPLEX_RAS_GROUP_NODES |
| 213 | }; |
| 214 | |
| 215 | /* |
| 216 | * We have same probe and handler for each error record group, use a macro to |
| 217 | * simply the record definition. |
| 218 | */ |
| 219 | #define ADD_ONE_ERR_GROUP(errselr_start, group) \ |
| 220 | ERR_RECORD_SYSREG_V1((errselr_start), (uint32_t)ARRAY_SIZE((group)), \ |
| 221 | &tegra194_ras_record_probe, \ |
| 222 | &tegra194_ras_record_handler, (group)) |
| 223 | |
| 224 | /* RAS error record group information */ |
| 225 | static struct err_record_info carmel_ras_records[] = { |
| 226 | /* |
| 227 | * Per core ras error records |
| 228 | * ERRSELR starts from 0*256 + Logical_CPU_ID*16 + 0 to |
| 229 | * 0*256 + Logical_CPU_ID*16 + 5 for each group. |
| 230 | * 8 cores/groups, 6 * 8 nodes in total. |
| 231 | */ |
| 232 | ADD_ONE_ERR_GROUP(0x000, per_core_ras_group), |
| 233 | ADD_ONE_ERR_GROUP(0x010, per_core_ras_group), |
| 234 | ADD_ONE_ERR_GROUP(0x020, per_core_ras_group), |
| 235 | ADD_ONE_ERR_GROUP(0x030, per_core_ras_group), |
| 236 | ADD_ONE_ERR_GROUP(0x040, per_core_ras_group), |
| 237 | ADD_ONE_ERR_GROUP(0x050, per_core_ras_group), |
| 238 | ADD_ONE_ERR_GROUP(0x060, per_core_ras_group), |
| 239 | ADD_ONE_ERR_GROUP(0x070, per_core_ras_group), |
| 240 | |
| 241 | /* |
| 242 | * Per cluster ras error records |
| 243 | * ERRSELR starts from 2*256 + Logical_Cluster_ID*16 + 0 to |
| 244 | * 2*256 + Logical_Cluster_ID*16 + 3. |
| 245 | * 4 clusters/groups, 3 * 4 nodes in total. |
| 246 | */ |
| 247 | ADD_ONE_ERR_GROUP(0x200, per_cluster_ras_group), |
| 248 | ADD_ONE_ERR_GROUP(0x210, per_cluster_ras_group), |
| 249 | ADD_ONE_ERR_GROUP(0x220, per_cluster_ras_group), |
| 250 | ADD_ONE_ERR_GROUP(0x230, per_cluster_ras_group), |
| 251 | |
| 252 | /* |
| 253 | * SCF L3_Bank ras error records |
| 254 | * ERRSELR: 3*256 + L3_Bank_ID, L3_Bank_ID: 0-3 |
| 255 | * 1 groups, 4 nodes in total. |
| 256 | */ |
| 257 | ADD_ONE_ERR_GROUP(0x300, scf_l3_ras_group), |
| 258 | |
| 259 | /* |
| 260 | * CCPLEX ras error records |
| 261 | * ERRSELR: 4*256 + Unit_ID, Unit_ID: 0 - 4 |
| 262 | * 1 groups, 5 nodes in total. |
| 263 | */ |
| 264 | ADD_ONE_ERR_GROUP(0x400, ccplex_ras_group), |
| 265 | }; |
| 266 | |
| 267 | REGISTER_ERR_RECORD_INFO(carmel_ras_records); |
| 268 | |
| 269 | /* dummy RAS interrupt */ |
| 270 | static struct ras_interrupt carmel_ras_interrupts[] = {}; |
| 271 | REGISTER_RAS_INTERRUPTS(carmel_ras_interrupts); |
| 272 | |
| 273 | /******************************************************************************* |
| 274 | * RAS handler for the platform |
| 275 | ******************************************************************************/ |
| 276 | void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie, |
| 277 | void *handle, uint64_t flags) |
| 278 | { |
| 279 | #if RAS_EXTENSION |
| 280 | tegra194_ea_handler(ea_reason, syndrome, cookie, handle, flags); |
| 281 | #else |
| 282 | ERROR("Unhandled External Abort received on 0x%llx at EL3!\n", |
| 283 | read_mpidr_el1()); |
| 284 | ERROR(" exception reason=%u syndrome=0x%lx\n", ea_reason, syndrome); |
| 285 | panic(); |
| 286 | #endif |
| 287 | } |