Kumar Gala | 124b082 | 2008-08-26 15:01:29 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2008 Freescale Semiconductor, Inc. |
| 3 | * |
| 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public License |
| 6 | * Version 2 as published by the Free Software Foundation. |
| 7 | */ |
| 8 | |
| 9 | #include <common.h> |
| 10 | #include <asm/fsl_ddr_sdram.h> |
| 11 | |
| 12 | #include "ddr.h" |
| 13 | |
| 14 | /* |
| 15 | * compute_lowest_common_dimm_parameters() |
| 16 | * |
| 17 | * Determine the worst-case DIMM timing parameters from the set of DIMMs |
| 18 | * whose parameters have been computed into the array pointed to |
| 19 | * by dimm_params. |
| 20 | */ |
| 21 | unsigned int |
| 22 | compute_lowest_common_dimm_parameters(const dimm_params_t *dimm_params, |
| 23 | common_timing_params_t *outpdimm, |
| 24 | unsigned int number_of_dimms) |
| 25 | { |
| 26 | unsigned int i; |
| 27 | |
| 28 | unsigned int tCKmin_X_ps = 0; |
| 29 | unsigned int tCKmax_ps = 0xFFFFFFFF; |
| 30 | unsigned int tCKmax_max_ps = 0; |
| 31 | unsigned int tRCD_ps = 0; |
| 32 | unsigned int tRP_ps = 0; |
| 33 | unsigned int tRAS_ps = 0; |
| 34 | unsigned int tWR_ps = 0; |
| 35 | unsigned int tWTR_ps = 0; |
| 36 | unsigned int tRFC_ps = 0; |
| 37 | unsigned int tRRD_ps = 0; |
| 38 | unsigned int tRC_ps = 0; |
| 39 | unsigned int refresh_rate_ps = 0; |
| 40 | unsigned int tIS_ps = 0; |
| 41 | unsigned int tIH_ps = 0; |
| 42 | unsigned int tDS_ps = 0; |
| 43 | unsigned int tDH_ps = 0; |
| 44 | unsigned int tRTP_ps = 0; |
| 45 | unsigned int tDQSQ_max_ps = 0; |
| 46 | unsigned int tQHS_ps = 0; |
| 47 | |
| 48 | unsigned int temp1, temp2; |
| 49 | unsigned int lowest_good_caslat; |
| 50 | unsigned int additive_latency = 0; |
| 51 | const unsigned int mclk_ps = get_memory_clk_period_ps(); |
| 52 | unsigned int not_ok; |
| 53 | |
| 54 | debug("using mclk_ps = %u\n", mclk_ps); |
| 55 | |
| 56 | temp1 = 0; |
| 57 | for (i = 0; i < number_of_dimms; i++) { |
| 58 | /* |
| 59 | * If there are no ranks on this DIMM, |
| 60 | * it probably doesn't exist, so skip it. |
| 61 | */ |
| 62 | if (dimm_params[i].n_ranks == 0) { |
| 63 | temp1++; |
| 64 | continue; |
| 65 | } |
| 66 | |
| 67 | /* |
| 68 | * Find minimum tCKmax_ps to find fastest slow speed, |
| 69 | * i.e., this is the slowest the whole system can go. |
| 70 | */ |
| 71 | tCKmax_ps = min(tCKmax_ps, dimm_params[i].tCKmax_ps); |
| 72 | |
| 73 | /* Either find maximum value to determine slowest |
| 74 | * speed, delay, time, period, etc */ |
| 75 | tCKmin_X_ps = max(tCKmin_X_ps, dimm_params[i].tCKmin_X_ps); |
| 76 | tCKmax_max_ps = max(tCKmax_max_ps, dimm_params[i].tCKmax_ps); |
| 77 | tRCD_ps = max(tRCD_ps, dimm_params[i].tRCD_ps); |
| 78 | tRP_ps = max(tRP_ps, dimm_params[i].tRP_ps); |
| 79 | tRAS_ps = max(tRAS_ps, dimm_params[i].tRAS_ps); |
| 80 | tWR_ps = max(tWR_ps, dimm_params[i].tWR_ps); |
| 81 | tWTR_ps = max(tWTR_ps, dimm_params[i].tWTR_ps); |
| 82 | tRFC_ps = max(tRFC_ps, dimm_params[i].tRFC_ps); |
| 83 | tRRD_ps = max(tRRD_ps, dimm_params[i].tRRD_ps); |
| 84 | tRC_ps = max(tRC_ps, dimm_params[i].tRC_ps); |
| 85 | tIS_ps = max(tIS_ps, dimm_params[i].tIS_ps); |
| 86 | tIH_ps = max(tIH_ps, dimm_params[i].tIH_ps); |
| 87 | tDS_ps = max(tDS_ps, dimm_params[i].tDS_ps); |
| 88 | tDH_ps = max(tDH_ps, dimm_params[i].tDH_ps); |
| 89 | tRTP_ps = max(tRTP_ps, dimm_params[i].tRTP_ps); |
| 90 | tQHS_ps = max(tQHS_ps, dimm_params[i].tQHS_ps); |
| 91 | refresh_rate_ps = max(refresh_rate_ps, |
| 92 | dimm_params[i].refresh_rate_ps); |
| 93 | |
| 94 | /* |
| 95 | * Find maximum tDQSQ_max_ps to find slowest. |
| 96 | * |
| 97 | * FIXME: is finding the slowest value the correct |
| 98 | * strategy for this parameter? |
| 99 | */ |
| 100 | tDQSQ_max_ps = max(tDQSQ_max_ps, dimm_params[i].tDQSQ_max_ps); |
| 101 | } |
| 102 | |
| 103 | outpdimm->ndimms_present = number_of_dimms - temp1; |
| 104 | |
| 105 | if (temp1 == number_of_dimms) { |
| 106 | debug("no dimms this memory controller\n"); |
| 107 | return 0; |
| 108 | } |
| 109 | |
| 110 | outpdimm->tCKmin_X_ps = tCKmin_X_ps; |
| 111 | outpdimm->tCKmax_ps = tCKmax_ps; |
| 112 | outpdimm->tCKmax_max_ps = tCKmax_max_ps; |
| 113 | outpdimm->tRCD_ps = tRCD_ps; |
| 114 | outpdimm->tRP_ps = tRP_ps; |
| 115 | outpdimm->tRAS_ps = tRAS_ps; |
| 116 | outpdimm->tWR_ps = tWR_ps; |
| 117 | outpdimm->tWTR_ps = tWTR_ps; |
| 118 | outpdimm->tRFC_ps = tRFC_ps; |
| 119 | outpdimm->tRRD_ps = tRRD_ps; |
| 120 | outpdimm->tRC_ps = tRC_ps; |
| 121 | outpdimm->refresh_rate_ps = refresh_rate_ps; |
| 122 | outpdimm->tIS_ps = tIS_ps; |
| 123 | outpdimm->tIH_ps = tIH_ps; |
| 124 | outpdimm->tDS_ps = tDS_ps; |
| 125 | outpdimm->tDH_ps = tDH_ps; |
| 126 | outpdimm->tRTP_ps = tRTP_ps; |
| 127 | outpdimm->tDQSQ_max_ps = tDQSQ_max_ps; |
| 128 | outpdimm->tQHS_ps = tQHS_ps; |
| 129 | |
| 130 | /* Determine common burst length for all DIMMs. */ |
| 131 | temp1 = 0xff; |
| 132 | for (i = 0; i < number_of_dimms; i++) { |
| 133 | if (dimm_params[i].n_ranks) { |
| 134 | temp1 &= dimm_params[i].burst_lengths_bitmask; |
| 135 | } |
| 136 | } |
| 137 | outpdimm->all_DIMMs_burst_lengths_bitmask = temp1; |
| 138 | |
| 139 | /* Determine if all DIMMs registered buffered. */ |
| 140 | temp1 = temp2 = 0; |
| 141 | for (i = 0; i < number_of_dimms; i++) { |
| 142 | if (dimm_params[i].n_ranks) { |
| 143 | if (dimm_params[i].registered_dimm) |
| 144 | temp1 = 1; |
| 145 | if (!dimm_params[i].registered_dimm) |
| 146 | temp2 = 1; |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | outpdimm->all_DIMMs_registered = 0; |
| 151 | if (temp1 && !temp2) { |
| 152 | outpdimm->all_DIMMs_registered = 1; |
| 153 | } |
| 154 | |
| 155 | outpdimm->all_DIMMs_unbuffered = 0; |
| 156 | if (!temp1 && temp2) { |
| 157 | outpdimm->all_DIMMs_unbuffered = 1; |
| 158 | } |
| 159 | |
| 160 | /* CHECKME: */ |
| 161 | if (!outpdimm->all_DIMMs_registered |
| 162 | && !outpdimm->all_DIMMs_unbuffered) { |
| 163 | printf("ERROR: Mix of registered buffered and unbuffered " |
| 164 | "DIMMs detected!\n"); |
| 165 | } |
| 166 | |
| 167 | /* |
| 168 | * Compute a CAS latency suitable for all DIMMs |
| 169 | * |
| 170 | * Strategy for SPD-defined latencies: compute only |
| 171 | * CAS latency defined by all DIMMs. |
| 172 | */ |
| 173 | |
| 174 | /* |
| 175 | * Step 1: find CAS latency common to all DIMMs using bitwise |
| 176 | * operation. |
| 177 | */ |
| 178 | temp1 = 0xFF; |
| 179 | for (i = 0; i < number_of_dimms; i++) { |
| 180 | if (dimm_params[i].n_ranks) { |
| 181 | temp2 = 0; |
| 182 | temp2 |= 1 << dimm_params[i].caslat_X; |
| 183 | temp2 |= 1 << dimm_params[i].caslat_X_minus_1; |
| 184 | temp2 |= 1 << dimm_params[i].caslat_X_minus_2; |
| 185 | /* |
| 186 | * FIXME: If there was no entry for X-2 (X-1) in |
| 187 | * the SPD, then caslat_X_minus_2 |
| 188 | * (caslat_X_minus_1) contains either 255 or |
| 189 | * 0xFFFFFFFF because that's what the glorious |
| 190 | * __ilog2 function returns for an input of 0. |
| 191 | * On 32-bit PowerPC, left shift counts with bit |
| 192 | * 26 set (that the value of 255 or 0xFFFFFFFF |
| 193 | * will have), cause the destination register to |
| 194 | * be 0. That is why this works. |
| 195 | */ |
| 196 | temp1 &= temp2; |
| 197 | } |
| 198 | } |
| 199 | |
| 200 | /* |
| 201 | * Step 2: check each common CAS latency against tCK of each |
| 202 | * DIMM's SPD. |
| 203 | */ |
| 204 | lowest_good_caslat = 0; |
| 205 | temp2 = 0; |
| 206 | while (temp1) { |
| 207 | not_ok = 0; |
| 208 | temp2 = __ilog2(temp1); |
| 209 | debug("checking common caslat = %u\n", temp2); |
| 210 | |
| 211 | /* Check if this CAS latency will work on all DIMMs at tCK. */ |
| 212 | for (i = 0; i < number_of_dimms; i++) { |
| 213 | if (!dimm_params[i].n_ranks) { |
| 214 | continue; |
| 215 | } |
| 216 | if (dimm_params[i].caslat_X == temp2) { |
| 217 | if (mclk_ps >= dimm_params[i].tCKmin_X_ps) { |
| 218 | debug("CL = %u ok on DIMM %u at tCK=%u" |
| 219 | " ps with its tCKmin_X_ps of %u\n", |
| 220 | temp2, i, mclk_ps, |
| 221 | dimm_params[i].tCKmin_X_ps); |
| 222 | continue; |
| 223 | } else { |
| 224 | not_ok++; |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | if (dimm_params[i].caslat_X_minus_1 == temp2) { |
| 229 | unsigned int tCKmin_X_minus_1_ps |
| 230 | = dimm_params[i].tCKmin_X_minus_1_ps; |
| 231 | if (mclk_ps >= tCKmin_X_minus_1_ps) { |
| 232 | debug("CL = %u ok on DIMM %u at " |
| 233 | "tCK=%u ps with its " |
| 234 | "tCKmin_X_minus_1_ps of %u\n", |
| 235 | temp2, i, mclk_ps, |
| 236 | tCKmin_X_minus_1_ps); |
| 237 | continue; |
| 238 | } else { |
| 239 | not_ok++; |
| 240 | } |
| 241 | } |
| 242 | |
| 243 | if (dimm_params[i].caslat_X_minus_2 == temp2) { |
| 244 | unsigned int tCKmin_X_minus_2_ps |
| 245 | = dimm_params[i].tCKmin_X_minus_2_ps; |
| 246 | if (mclk_ps >= tCKmin_X_minus_2_ps) { |
| 247 | debug("CL = %u ok on DIMM %u at " |
| 248 | "tCK=%u ps with its " |
| 249 | "tCKmin_X_minus_2_ps of %u\n", |
| 250 | temp2, i, mclk_ps, |
| 251 | tCKmin_X_minus_2_ps); |
| 252 | continue; |
| 253 | } else { |
| 254 | not_ok++; |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | if (!not_ok) { |
| 260 | lowest_good_caslat = temp2; |
| 261 | } |
| 262 | |
| 263 | temp1 &= ~(1 << temp2); |
| 264 | } |
| 265 | |
| 266 | debug("lowest common SPD-defined CAS latency = %u\n", |
| 267 | lowest_good_caslat); |
| 268 | outpdimm->lowest_common_SPD_caslat = lowest_good_caslat; |
| 269 | |
| 270 | |
| 271 | /* |
| 272 | * Compute a common 'de-rated' CAS latency. |
| 273 | * |
| 274 | * The strategy here is to find the *highest* dereated cas latency |
| 275 | * with the assumption that all of the DIMMs will support a dereated |
| 276 | * CAS latency higher than or equal to their lowest dereated value. |
| 277 | */ |
| 278 | temp1 = 0; |
| 279 | for (i = 0; i < number_of_dimms; i++) { |
| 280 | temp1 = max(temp1, dimm_params[i].caslat_lowest_derated); |
| 281 | } |
| 282 | outpdimm->highest_common_derated_caslat = temp1; |
| 283 | debug("highest common dereated CAS latency = %u\n", temp1); |
| 284 | |
| 285 | /* Determine if all DIMMs ECC capable. */ |
| 286 | temp1 = 1; |
| 287 | for (i = 0; i < number_of_dimms; i++) { |
| 288 | if (dimm_params[i].n_ranks && dimm_params[i].edc_config != 2) { |
| 289 | temp1 = 0; |
| 290 | break; |
| 291 | } |
| 292 | } |
| 293 | if (temp1) { |
| 294 | debug("all DIMMs ECC capable\n"); |
| 295 | } else { |
| 296 | debug("Warning: not all DIMMs ECC capable, cant enable ECC\n"); |
| 297 | } |
| 298 | outpdimm->all_DIMMs_ECC_capable = temp1; |
| 299 | |
| 300 | |
| 301 | /* FIXME: move to somewhere else to validate. */ |
| 302 | if (mclk_ps > tCKmax_max_ps) { |
| 303 | printf("Warning: some of the installed DIMMs " |
| 304 | "can not operate this slowly.\n"); |
| 305 | return 1; |
| 306 | } |
| 307 | |
| 308 | /* |
| 309 | * Compute additive latency. |
| 310 | * |
| 311 | * For DDR1, additive latency should be 0. |
| 312 | * |
| 313 | * For DDR2, with ODT enabled, use "a value" less than ACTTORW, |
| 314 | * which comes from Trcd, and also note that: |
| 315 | * add_lat + caslat must be >= 4 |
| 316 | * |
| 317 | * For DDR3, FIXME additive latency determination |
| 318 | * |
| 319 | * When to use additive latency for DDR2: |
| 320 | * |
| 321 | * I. Because you are using CL=3 and need to do ODT on writes and |
| 322 | * want functionality. |
| 323 | * 1. Are you going to use ODT? (Does your board not have |
| 324 | * additional termination circuitry for DQ, DQS, DQS_, |
| 325 | * DM, RDQS, RDQS_ for x4/x8 configs?) |
| 326 | * 2. If so, is your lowest supported CL going to be 3? |
| 327 | * 3. If so, then you must set AL=1 because |
| 328 | * |
| 329 | * WL >= 3 for ODT on writes |
| 330 | * RL = AL + CL |
| 331 | * WL = RL - 1 |
| 332 | * -> |
| 333 | * WL = AL + CL - 1 |
| 334 | * AL + CL - 1 >= 3 |
| 335 | * AL + CL >= 4 |
| 336 | * QED |
| 337 | * |
| 338 | * RL >= 3 for ODT on reads |
| 339 | * RL = AL + CL |
| 340 | * |
| 341 | * Since CL aren't usually less than 2, AL=0 is a minimum, |
| 342 | * so the WL-derived AL should be the -- FIXME? |
| 343 | * |
| 344 | * II. Because you are using auto-precharge globally and want to |
| 345 | * use additive latency (posted CAS) to get more bandwidth. |
| 346 | * 1. Are you going to use auto-precharge mode globally? |
| 347 | * |
| 348 | * Use addtivie latency and compute AL to be 1 cycle less than |
| 349 | * tRCD, i.e. the READ or WRITE command is in the cycle |
| 350 | * immediately following the ACTIVATE command.. |
| 351 | * |
| 352 | * III. Because you feel like it or want to do some sort of |
| 353 | * degraded-performance experiment. |
| 354 | * 1. Do you just want to use additive latency because you feel |
| 355 | * like it? |
| 356 | * |
| 357 | * Validation: AL is less than tRCD, and within the other |
| 358 | * read-to-precharge constraints. |
| 359 | */ |
| 360 | |
| 361 | additive_latency = 0; |
| 362 | |
| 363 | #if defined(CONFIG_FSL_DDR2) |
| 364 | if (lowest_good_caslat < 4) { |
| 365 | additive_latency = picos_to_mclk(tRCD_ps) - lowest_good_caslat; |
| 366 | if (mclk_to_picos(additive_latency) > tRCD_ps) { |
| 367 | additive_latency = picos_to_mclk(tRCD_ps); |
| 368 | debug("setting additive_latency to %u because it was " |
| 369 | " greater than tRCD_ps\n", additive_latency); |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | #elif defined(CONFIG_FSL_DDR3) |
| 374 | error "FIXME determine additive latency for DDR3" |
| 375 | #endif |
| 376 | |
| 377 | /* |
| 378 | * Validate additive latency |
| 379 | * FIXME: move to somewhere else to validate |
| 380 | * |
| 381 | * AL <= tRCD(min) |
| 382 | */ |
| 383 | if (mclk_to_picos(additive_latency) > tRCD_ps) { |
| 384 | printf("Error: invalid additive latency exceeds tRCD(min).\n"); |
| 385 | return 1; |
| 386 | } |
| 387 | |
| 388 | /* |
| 389 | * RL = CL + AL; RL >= 3 for ODT_RD_CFG to be enabled |
| 390 | * WL = RL - 1; WL >= 3 for ODT_WL_CFG to be enabled |
| 391 | * ADD_LAT (the register) must be set to a value less |
| 392 | * than ACTTORW if WL = 1, then AL must be set to 1 |
| 393 | * RD_TO_PRE (the register) must be set to a minimum |
| 394 | * tRTP + AL if AL is nonzero |
| 395 | */ |
| 396 | |
| 397 | /* |
| 398 | * Additive latency will be applied only if the memctl option to |
| 399 | * use it. |
| 400 | */ |
| 401 | outpdimm->additive_latency = additive_latency; |
| 402 | |
| 403 | return 0; |
| 404 | } |