blob: 327cdc58730ec498c46a63fbe8bd8ad915ea9bfc [file] [log] [blame]
Aaron Williamse60c6a72020-09-02 08:29:07 +02001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2020 Marvell International Ltd.
4 */
5
6#include <command.h>
7#include <dm.h>
8#include <hang.h>
9#include <i2c.h>
10#include <ram.h>
11#include <time.h>
12
13#include <linux/bitops.h>
14#include <linux/io.h>
15
16#include <mach/octeon_ddr.h>
17
18/* Random number generator stuff */
19
20#define CVMX_RNM_CTL_STATUS 0x0001180040000000
21#define CVMX_OCT_DID_RNG 8ULL
22
23static u64 cvmx_build_io_address(u64 major_did, u64 sub_did)
24{
25 return ((0x1ull << 48) | (major_did << 43) | (sub_did << 40));
26}
27
28static u64 cvmx_rng_get_random64(void)
29{
30 return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0));
31}
32
33static void cvmx_rng_enable(void)
34{
35 u64 val;
36
37 val = csr_rd(CVMX_RNM_CTL_STATUS);
38 val |= BIT(0) | BIT(1);
39 csr_wr(CVMX_RNM_CTL_STATUS, val);
40}
41
42#define RLEVEL_PRINTALL_DEFAULT 1
43#define WLEVEL_PRINTALL_DEFAULT 1
44
45/*
46 * Define how many HW WL samples to take for majority voting.
47 * MUST BE odd!!
48 * Assume there should only be 2 possible values that will show up,
49 * so treat ties as a problem!!!
50 * NOTE: Do not change this without checking the code!!!
51 */
52#define WLEVEL_LOOPS_DEFAULT 5
53
54#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
55#define SW_WLEVEL_HW_DEFAULT 1
56#define DEFAULT_BEST_RANK_SCORE 9999999
57#define MAX_RANK_SCORE_LIMIT 99
58
59/*
60 * Define how many HW RL samples per rank to take multiple samples will
61 * allow looking for the best sample score
62 */
63#define RLEVEL_SAMPLES_DEFAULT 3
64
65#define ddr_seq_print(format, ...) do {} while (0)
66
67struct wlevel_bitcnt {
68 int bitcnt[4];
69};
70
71static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
72 int ecc_ena, int *settings, char *title);
73
74static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
75 int dac_value, int byte);
76
77/* "mode" arg */
78#define DBTRAIN_TEST 0
79#define DBTRAIN_DBI 1
80#define DBTRAIN_LFSR 2
81
82static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
83 int mode, u64 *xor_data);
84
85#define LMC_DDR3_RESET_ASSERT 0
86#define LMC_DDR3_RESET_DEASSERT 1
87
88static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset)
89{
90 union cvmx_lmcx_reset_ctl reset_ctl;
91
92 /*
93 * 4. Deassert DDRn_RESET_L pin by writing
94 * LMC(0..3)_RESET_CTL[DDR3RST] = 1
95 * without modifying any other LMC(0..3)_RESET_CTL fields.
96 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
97 * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
98 * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE*
99 * assertion.
100 */
101 debug("LMC%d %s DDR_RESET_L\n", if_num,
102 (reset ==
103 LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
104
105 reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
106 reset_ctl.cn78xx.ddr3rst = reset;
107 lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64);
108
109 lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
110
111 udelay(500);
112}
113
114static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num)
115{
116 /*
117 * 5.9.6 LMC RESET Initialization
118 *
119 * The purpose of this step is to assert/deassert the RESET# pin at the
120 * DDR3/DDR4 parts.
121 *
122 * This LMC RESET step is done for all enabled LMCs.
123 *
124 * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
125 * are in self refresh and are currently preserving their
126 * contents. (Software can determine this via
127 * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
128 * this section assumes that the DRAM contents need not be preserved.
129 *
130 * The remainder of this section assumes that the CN78XX DDRn_RESET_L
131 * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts,
132 * as will be appropriate in many systems.
133 *
134 * (In other systems, such as ones that can preserve DDR3/DDR4 part
135 * contents while CN78XX is powered down, it will not be appropriate to
136 * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
137 * DDR3/DDR4 parts, and this section may not apply.)
138 *
139 * The remainder of this section describes the sequence for LMCn.
140 *
141 * Perform the following six substeps for LMC reset initialization:
142 *
143 * 1. If not done already, assert DDRn_RESET_L pin by writing
144 * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
145 * LMC(0..3)_RESET_CTL fields.
146 */
147
148 if (!ddr_memory_preserved(priv)) {
149 /*
150 * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
151 */
152
153 lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
154
155 /*
156 * 3. Wait until RESET# assertion-time requirement from JEDEC
157 * DDR3/DDR4 specification is satisfied (200 us during a
158 * power-on ramp, 100ns when power is already stable).
159 */
160
161 udelay(200);
162
163 /*
164 * 4. Deassert DDRn_RESET_L pin by writing
165 * LMC(0..3)_RESET_CTL[DDR3RST] = 1
166 * without modifying any other LMC(0..3)_RESET_CTL fields.
167 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
168 * 6. Wait a minimum of 500us. This guarantees the necessary
169 * T = 500us delay between DDRn_RESET_L deassertion and
170 * DDRn_DIMM*_CKE* assertion.
171 */
172 cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
173
174 /* Toggle Reset Again */
175 /* That is, assert, then de-assert, one more time */
176 cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT);
177 cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
178 }
179}
180
181void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
182 int sequence)
183{
184 /*
185 * 3. Without changing any other fields in LMC(0)_CONFIG, write
186 * LMC(0)_CONFIG[RANKMASK] then write both
187 * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
188 * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
189 * the ranks that will participate in the sequence.
190 *
191 * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
192 * selfrefresh exit, depending on whether the DRAM parts are in
193 * self-refresh and whether their contents should be preserved. While
194 * LMC performs these sequences, it will not perform any other DDR3
195 * transactions. When the sequence is complete, hardware sets the
196 * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
197 * initialized.
198 *
199 * If power-up/init is selected immediately following a DRESET
200 * assertion, LMC executes the sequence described in the "Reset and
201 * Initialization Procedure" section of the JEDEC DDR3
202 * specification. This includes activating CKE, writing all four DDR3
203 * mode registers on all selected ranks, and issuing the required
204 * ZQCL
205 * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
206 * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
207 * LMC writes the JEDEC standard SSTE32882 control words selected by
208 * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
209 * the first DDR3 mode register write operation.
210 * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
211 * corresponding DIMM is not present.
212 *
213 * If self-refresh exit is selected, LMC executes the required SRX
214 * command followed by a refresh and ZQ calibration. Section 4.5
215 * describes behavior of a REF + ZQCS. LMC does not write the DDR3
216 * mode registers as part of this sequence, and the mode register
217 * parameters must match at self-refresh entry and exit times.
218 *
219 * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE]
220 * to be set.
221 *
222 * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
223 * been initialized.
224 */
225
226 union cvmx_lmcx_seq_ctl seq_ctl;
227 union cvmx_lmcx_config lmc_config;
228 int timeout;
229
230 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
231 lmc_config.s.rankmask = rank_mask;
232 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
233
234 seq_ctl.u64 = 0;
235
236 seq_ctl.s.init_start = 1;
237 seq_ctl.s.seq_sel = sequence;
238
239 ddr_seq_print
240 ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n",
241 rank_mask, sequence, sequence_str[sequence]);
242
243 if (seq_ctl.s.seq_sel == 3)
244 debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num,
245 rank_mask);
246
247 lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64);
248 lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
249
250 timeout = 100;
251 do {
252 udelay(100); /* Wait a while */
253 seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
254 if (--timeout == 0) {
255 printf("Sequence %d timed out\n", sequence);
256 break;
257 }
258 } while (seq_ctl.s.seq_complete != 1);
259
260 ddr_seq_print(" LMC sequence=%x: Completed.\n", sequence);
261}
262
263#define bdk_numa_get_address(n, p) ((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT)
264#define AREA_BASE_OFFSET BIT_ULL(26)
265
266static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p,
267 u64 bitmask, u64 *xor_data)
268{
269 u64 p1, p2, d1, d2;
270 u64 v, v1;
271 u64 p2offset = (1ULL << 26); // offset to area 2
272 u64 datamask;
273 u64 xor;
274 u64 i, j, k;
275 u64 ii;
276 int errors = 0;
277 //u64 index;
278 u64 pattern1 = cvmx_rng_get_random64();
279 u64 pattern2 = 0;
280 u64 bad_bits[2] = { 0, 0 };
281 int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
282 union cvmx_l2c_ctl l2c_ctl;
283 int burst;
284 int saved_dissblkdty;
285 int node = 0;
286
287 // Force full cacheline write-backs to boost traffic
288 l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
289 saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty;
290 l2c_ctl.cn78xx.dissblkdty = 1;
291 l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
292
293 if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
294 kbitno = 18;
295
296 // Byte lanes may be clear in the mask to indicate no testing on that
297 //lane.
298 datamask = bitmask;
299
300 /*
301 * Add offset to both test regions to not clobber boot stuff
302 * when running from L2 for NAND boot.
303 */
304 p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
305
306 // final address must include LMC and node
307 p |= (lmc << 7); /* Map address into proper interface */
308 p = bdk_numa_get_address(node, p); /* Map to node */
309 p |= 1ull << 63;
310
311#define II_INC BIT_ULL(22)
312#define II_MAX BIT_ULL(22)
313#define K_INC BIT_ULL(14)
314#define K_MAX BIT_ULL(kbitno)
315#define J_INC BIT_ULL(9)
316#define J_MAX BIT_ULL(12)
317#define I_INC BIT_ULL(3)
318#define I_MAX BIT_ULL(7)
319
320 debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n",
321 node, lmc, __func__, p, p + p2offset, 1ULL << kbitno);
322
323 // loops are ordered so that only a single 64-bit slot is written to
324 // each cacheline at one time, then the cachelines are forced out;
325 // this should maximize read/write traffic
326
327 // FIXME? extend the range of memory tested!!
328 for (ii = 0; ii < II_MAX; ii += II_INC) {
329 for (i = 0; i < I_MAX; i += I_INC) {
330 for (k = 0; k < K_MAX; k += K_INC) {
331 for (j = 0; j < J_MAX; j += J_INC) {
332 p1 = p + ii + k + j;
333 p2 = p1 + p2offset;
334
335 v = pattern1 * (p1 + i);
336 // write the same thing to both areas
337 v1 = v;
338
339 cvmx_write64_uint64(p1 + i, v);
340 cvmx_write64_uint64(p2 + i, v1);
341
342 CVMX_CACHE_WBIL2(p1, 0);
343 CVMX_CACHE_WBIL2(p2, 0);
344 }
345 }
346 }
347 }
348
349 CVMX_DCACHE_INVALIDATE;
350
351 debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc);
352
353 /* Make a series of passes over the memory areas. */
354
355 for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) {
356 u64 this_pattern = cvmx_rng_get_random64();
357
358 pattern2 ^= this_pattern;
359
360 /*
361 * XOR the data with a random value, applying the change to both
362 * memory areas.
363 */
364
365 // FIXME? extend the range of memory tested!!
366 for (ii = 0; ii < II_MAX; ii += II_INC) {
367 // FIXME: rearranged, did not make much difference?
368 for (i = 0; i < I_MAX; i += I_INC) {
369 for (k = 0; k < K_MAX; k += K_INC) {
370 for (j = 0; j < J_MAX; j += J_INC) {
371 p1 = p + ii + k + j;
372 p2 = p1 + p2offset;
373
374 v = cvmx_read64_uint64(p1 +
375 i) ^
376 this_pattern;
377 v1 = cvmx_read64_uint64(p2 +
378 i) ^
379 this_pattern;
380
381 cvmx_write64_uint64(p1 + i, v);
382 cvmx_write64_uint64(p2 + i, v1);
383
384 CVMX_CACHE_WBIL2(p1, 0);
385 CVMX_CACHE_WBIL2(p2, 0);
386 }
387 }
388 }
389 }
390
391 CVMX_DCACHE_INVALIDATE;
392
393 debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
394 node, lmc);
395
396 /*
397 * Look for differences in the areas. If there is a mismatch,
398 * reset both memory locations with the same pattern. Failing
399 * to do so means that on all subsequent passes the pair of
400 * locations remain out of sync giving spurious errors.
401 */
402
403 // FIXME: Change the loop order so that an entire cache line
404 // is compared at one time. This is so that a read
405 // error that occurs *anywhere* on the cacheline will
406 // be caught, rather than comparing only 1 cacheline
407 // slot at a time, where an error on a different
408 // slot will be missed that time around
409 // Does the above make sense?
410
411 // FIXME? extend the range of memory tested!!
412 for (ii = 0; ii < II_MAX; ii += II_INC) {
413 for (k = 0; k < K_MAX; k += K_INC) {
414 for (j = 0; j < J_MAX; j += J_INC) {
415 p1 = p + ii + k + j;
416 p2 = p1 + p2offset;
417
418 // process entire cachelines in the
419 //innermost loop
420 for (i = 0; i < I_MAX; i += I_INC) {
421 int bybit = 1;
422 // start in byte lane 0
423 u64 bymsk = 0xffULL;
424
425 // FIXME: this should predict
426 // what we find...???
427 v = ((p1 + i) * pattern1) ^
428 pattern2;
429 d1 = cvmx_read64_uint64(p1 + i);
430 d2 = cvmx_read64_uint64(p2 + i);
431
432 // union of error bits only in
433 // active byte lanes
434 xor = ((d1 ^ v) | (d2 ^ v)) &
435 datamask;
436
437 if (!xor)
438 continue;
439
440 // accumulate bad bits
441 bad_bits[0] |= xor;
442
443 while (xor != 0) {
444 debug("ERROR(%03d): [0x%016llX] [0x%016llX] expected 0x%016llX d1 %016llX d2 %016llX\n",
445 burst, p1, p2, v,
446 d1, d2);
447 // error(s) in this lane
448 if (xor & bymsk) {
449 // set the byte
450 // error bit
451 errors |= bybit;
452 // clear byte
453 // lane in
454 // error bits
455 xor &= ~bymsk;
456 // clear the
457 // byte lane in
458 // the mask
459 datamask &= ~bymsk;
460#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
461 // nothing
462 // left to do
463 if (datamask == 0) {
464 return errors;
465 }
466#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
467 }
468 // move mask into
469 // next byte lane
470 bymsk <<= 8;
471 // move bit into next
472 // byte position
473 bybit <<= 1;
474 }
475 }
476 CVMX_CACHE_WBIL2(p1, 0);
477 CVMX_CACHE_WBIL2(p2, 0);
478 }
479 }
480 }
481
482 debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
483 node, lmc);
484 }
485
486 if (xor_data) { // send the bad bits back...
487 xor_data[0] = bad_bits[0];
488 xor_data[1] = bad_bits[1]; // let it be zeroed
489 }
490
491 // Restore original setting that could enable partial cacheline writes
492 l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
493 l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty;
494 l2c_wr(priv, CVMX_L2C_CTL, l2c_ctl.u64);
495
496 return errors;
497}
498
499static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank,
500 int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
501{
502 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
503
504 lmc_mr_mpr_ctl.u64 = 0;
505 lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
506 lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel;
507 lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank;
508 lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value =
509 (mr_wr_addr == -1) ? 1 : 0;
510 lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1;
511 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
512
513 /* Mode Register Write */
514 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
515}
516
517#define INV_A0_17(x) ((x) ^ 0x22bf8)
518
519static void set_mpr_mode(struct ddr_priv *priv, int rank_mask,
520 int if_num, int dimm_count, int mpr, int bg1)
521{
522 int rankx;
523
524 debug("All Ranks: Set mpr mode = %x %c-side\n",
525 mpr, (bg1 == 0) ? 'A' : 'B');
526
527 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
528 if (!(rank_mask & (1 << rankx)))
529 continue;
530 if (bg1 == 0) {
531 /* MR3 A-side */
532 ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1);
533 } else {
534 /* MR3 B-side */
535 ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3,
536 bg1);
537 }
538 }
539}
540
541static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num,
542 int rank, int page, int location)
543{
544 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
545
546 lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num));
547 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0;
548 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
549 lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
550 lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
551 lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0; /* Read=0, Write=1 */
552 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
553
554 /* MPR register access sequence */
555 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
556
557 debug("LMC_MR_MPR_CTL : 0x%016llx\n",
558 lmc_mr_mpr_ctl.u64);
559 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
560 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
561 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
562 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
563 debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n",
564 lmc_mr_mpr_ctl.cn70xx.mpr_loc);
565 debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n",
566 lmc_mr_mpr_ctl.cn70xx.mpr_wr);
567}
568
569static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable)
570{
571 union cvmx_lmcx_control lmc_control;
572 int save_rdimm_mode;
573
574 lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
575 save_rdimm_mode = lmc_control.s.rdimm_ena;
576 lmc_control.s.rdimm_ena = enable;
577 debug("Setting RDIMM_ENA = %x\n", enable);
578 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
579
580 return save_rdimm_mode;
581}
582
583static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank,
584 int page, int location, u64 *mpr_data)
585{
586 do_ddr4_mpr_read(priv, if_num, rank, page, location);
587
588 mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
589}
590
591/* Display MPR values for Page */
592static void display_mpr_page(struct ddr_priv *priv, int rank_mask,
593 int if_num, int page)
594{
595 int rankx, location;
596 u64 mpr_data[3];
597
598 for (rankx = 0; rankx < 4; rankx++) {
599 if (!(rank_mask & (1 << rankx)))
600 continue;
601
602 debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ",
603 if_num, rankx, page);
604 for (location = 0; location < 4; location++) {
605 ddr4_mpr_read(priv, if_num, rankx, page, location,
606 mpr_data);
607 debug("0x%02llx ", mpr_data[0] & 0xFF);
608 }
609 debug("\n");
610
611 } /* for (rankx = 0; rankx < 4; rankx++) */
612}
613
614static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank,
615 int page, int location, u8 mpr_data)
616{
617 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
618
619 lmc_mr_mpr_ctl.u64 = 0;
620 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data;
621 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
622 lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
623 lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
624 lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1; /* Read=0, Write=1 */
625 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
626
627 /* MPR register access sequence */
628 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
629
630 debug("LMC_MR_MPR_CTL : 0x%016llx\n",
631 lmc_mr_mpr_ctl.u64);
632 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
633 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
634 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
635 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
636 debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n",
637 lmc_mr_mpr_ctl.cn70xx.mpr_loc);
638 debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n",
639 lmc_mr_mpr_ctl.cn70xx.mpr_wr);
640}
641
642static void set_vref(struct ddr_priv *priv, int if_num, int rank,
643 int range, int value)
644{
645 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
646 union cvmx_lmcx_modereg_params3 lmc_modereg_params3;
647 int mr_wr_addr = 0;
648
649 lmc_mr_mpr_ctl.u64 = 0;
650 lmc_modereg_params3.u64 = lmc_rd(priv,
651 CVMX_LMCX_MODEREG_PARAMS3(if_num));
652
653 /* A12:A10 tCCD_L */
654 mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10;
655 mr_wr_addr |= 1 << 7; /* A7 1 = Enable(Training Mode) */
656 mr_wr_addr |= range << 6; /* A6 vrefDQ Training Range */
657 mr_wr_addr |= value << 0; /* A5:A0 vrefDQ Training Value */
658
659 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
660 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6; /* Write MR6 */
661 lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
662 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
663
664 /* 0x8 = Mode Register Write */
665 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
666
667 /*
668 * It is vendor specific whether vref_value is captured with A7=1.
669 * A subsequent MRS might be necessary.
670 */
671 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
672
673 mr_wr_addr &= ~(1 << 7); /* A7 0 = Disable(Training Mode) */
674 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
675 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
676}
677
678static void set_dram_output_inversion(struct ddr_priv *priv, int if_num,
679 int dimm_count, int rank_mask,
680 int inversion)
681{
682 union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl;
683 union cvmx_lmcx_dimmx_params lmc_dimmx_params;
684 union cvmx_lmcx_dimm_ctl lmc_dimm_ctl;
685 int dimm_no;
686
687 /* Don't touch extenced register control words */
688 lmc_ddr4_dimm_ctl.u64 = 0;
689 lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64);
690
691 debug("All DIMMs: Register Control Word RC0 : %x\n",
692 (inversion & 1));
693
694 for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
695 lmc_dimmx_params.u64 =
696 lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num));
697 lmc_dimmx_params.s.rc0 =
698 (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
699
700 lmc_wr(priv,
701 CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num),
702 lmc_dimmx_params.u64);
703 }
704
705 /* LMC0_DIMM_CTL */
706 lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
707 lmc_dimm_ctl.s.dimm0_wmask = 0x1;
708 lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
709
710 debug("LMC DIMM_CTL : 0x%016llx\n",
711 lmc_dimm_ctl.u64);
712 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64);
713
714 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); /* Init RCW */
715}
716
717static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask,
718 int if_num, int dimm_count, int pattern,
719 int location_mask)
720{
721 int rankx;
722 int location;
723
724 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
725 if (!(rank_mask & (1 << rankx)))
726 continue;
727 for (location = 0; location < 4; ++location) {
728 if (!(location_mask & (1 << location)))
729 continue;
730
731 ddr4_mpr_write(priv, if_num, rankx,
732 /* page */ 0, /* location */ location,
733 pattern);
734 }
735 }
736}
737
738static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask,
739 int if_num, int dimm_count)
740{
741 int save_ref_zqcs_int;
742 union cvmx_lmcx_config lmc_config;
743
744 /*
745 * Okay, here is the latest sequence. This should work for all
746 * chips and passes (78,88,73,etc). This sequence should be run
747 * immediately after DRAM INIT. The basic idea is to write the
748 * same pattern into each of the 4 MPR locations in the DRAM, so
749 * that the same value is returned when doing MPR reads regardless
750 * of the inversion state. My advice is to put this into a
751 * function, change_rdimm_mpr_pattern or something like that, so
752 * that it can be called multiple times, as I think David wants a
753 * clock-like pattern for OFFSET training, but does not want a
754 * clock pattern for Bit-Deskew. You should then be able to call
755 * this at any point in the init sequence (after DRAM init) to
756 * change the pattern to a new value.
757 * Mike
758 *
759 * A correction: PHY doesn't need any pattern during offset
760 * training, but needs clock like pattern for internal vref and
761 * bit-dskew training. So for that reason, these steps below have
762 * to be conducted before those trainings to pre-condition
763 * the pattern. David
764 *
765 * Note: Step 3, 4, 8 and 9 have to be done through RDIMM
766 * sequence. If you issue MRW sequence to do RCW write (in o78 pass
767 * 1 at least), LMC will still do two commands because
768 * CONTROL[RDIMM_ENA] is still set high. We don't want it to have
769 * any unintentional mode register write so it's best to do what
770 * Mike is doing here.
771 * Andrew
772 */
773
774 /* 1) Disable refresh (REF_ZQCS_INT = 0) */
775
776 debug("1) Disable refresh (REF_ZQCS_INT = 0)\n");
777
778 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
779 save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
780 lmc_config.cn78xx.ref_zqcs_int = 0;
781 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
782
783 /*
784 * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
785 * with MODEREG_PARAMS0[MPRLOC]=0,
786 * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
787 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
788 */
789
790 debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
791
792 /* A-side */
793 set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0);
794 /* B-side */
795 set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1);
796
797 /*
798 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
799 * the value you would like directly into
800 * MR_MPR_CTL[MR_WR_ADDR]
801 */
802
803 /*
804 * 3) Disable RCD Parity (if previously enabled) - parity does not
805 * work if inversion disabled
806 */
807
808 debug("3) Disable RCD Parity\n");
809
810 /*
811 * 4) Disable Inversion in the RCD.
812 * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
813 * may be easier to use the MRW sequence (seq_sel=8). Just set
814 * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
815 * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg
816 */
817
818 debug("4) Disable Inversion in the RCD.\n");
819
820 set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1);
821
822 /*
823 * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
824 * non-inverted.
825 */
826
827 debug("5) Disable CONTROL[RDIMM_ENA]\n");
828
829 set_rdimm_mode(priv, if_num, 0);
830
831 /*
832 * 6) Write all 4 MPR registers with the desired pattern (have to
833 * do this for all enabled ranks)
834 * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
835 * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern
836 */
837
838 debug("6) Write all 4 MPR page 0 Training Patterns\n");
839
840 write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8);
841
842 /* 7) Re-enable RDIMM_ENA */
843
844 debug("7) Re-enable RDIMM_ENA\n");
845
846 set_rdimm_mode(priv, if_num, 1);
847
848 /* 8) Re-enable RDIMM inversion */
849
850 debug("8) Re-enable RDIMM inversion\n");
851
852 set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0);
853
854 /* 9) Re-enable RDIMM parity (if desired) */
855
856 debug("9) Re-enable RDIMM parity (if desired)\n");
857
858 /*
859 * 10)Take B-side devices out of MPR mode (Run MRW sequence
860 * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
861 * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
862 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
863 */
864
865 debug("10)Take B-side devices out of MPR mode\n");
866
867 set_mpr_mode(priv, rank_mask, if_num, dimm_count,
868 /* mpr */ 0, /* bg1 */ 1);
869
870 /*
871 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
872 * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR]
873 */
874
875 /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
876
877 debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
878
879 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
880 lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
881 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
882}
883
884static int validate_hwl_seq(int *wl, int *seq)
885{
886 // sequence index, step through the sequence array
887 int seqx;
888 int bitnum;
889
890 seqx = 0;
891
892 while (seq[seqx + 1] >= 0) { // stop on next seq entry == -1
893 // but now, check current versus next
894 bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]];
895 // magic validity number (see matrix above)
896 if (!((1 << bitnum) & 0xBDE7))
897 return 1;
898 seqx++;
899 }
900
901 return 0;
902}
903
904static int validate_hw_wl_settings(int if_num,
905 union cvmx_lmcx_wlevel_rankx
906 *lmc_wlevel_rank, int is_rdimm, int ecc_ena)
907{
908 int wl[9], byte, errors;
909
910 // arrange the sequences so
911 // index 0 has byte 0, etc, ECC in middle
912 int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 };
913 // index 0 is ECC, then go down
914 int rseq1[] = { 8, 3, 2, 1, 0, -1 };
915 // index 0 has byte 4, then go up
916 int rseq2[] = { 4, 5, 6, 7, -1 };
917 // index 0 has byte 0, etc, no ECC
918 int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 };
919 // index 0 is byte 3, then go down, no ECC
920 int rseq1no[] = { 3, 2, 1, 0, -1 };
921
922 // in the CSR, bytes 0-7 are always data, byte 8 is ECC
923 for (byte = 0; byte < (8 + ecc_ena); byte++) {
924 // preprocess :-)
925 wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >>
926 1) & 3;
927 }
928
929 errors = 0;
930 if (is_rdimm) { // RDIMM order
931 errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
932 errors += validate_hwl_seq(wl, rseq2);
933 } else { // UDIMM order
934 errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno);
935 }
936
937 return errors;
938}
939
940static unsigned int extr_wr(u64 u, int x)
941{
942 return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) |
943 ((u >> (51 + x - 2)) & 0x4ULL));
944}
945
946static void insrt_wr(u64 *up, int x, int v)
947{
948 u64 u = *up;
949
950 u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x)));
951 *up = (u | ((v & 0x3ULL) << (x * 12 + 5)) |
952 ((v & 0x4ULL) << (51 + x - 2)));
953}
954
955/* Read out Deskew Settings for DDR */
956
957struct deskew_bytes {
958 u16 bits[8];
959};
960
961struct deskew_data {
962 struct deskew_bytes bytes[9];
963};
964
965struct dac_data {
966 int bytes[9];
967};
968
969// T88 pass 1, skip 4=DAC
970static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 };
971// T88 Pass 2, skip 4=DAC and 5=DBI
972static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 };
973
974static void get_deskew_settings(struct ddr_priv *priv, int if_num,
975 struct deskew_data *dskdat)
976{
977 union cvmx_lmcx_phy_ctl phy_ctl;
978 union cvmx_lmcx_config lmc_config;
979 int bit_index;
980 int byte_lane, byte_limit;
981 // NOTE: these are for pass 2.x
982 int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X);
983 const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1;
984
985 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
986 byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
987
988 memset(dskdat, 0, sizeof(*dskdat));
989
990 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
991 phy_ctl.s.dsk_dbg_clk_scaler = 3;
992
993 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
994 phy_ctl.s.dsk_dbg_byte_sel = byte_lane; // set byte lane
995
996 for (bit_index = 0; bit_index < 8; ++bit_index) {
997 // set bit number and start read sequence
998 phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index];
999 phy_ctl.s.dsk_dbg_rd_start = 1;
1000 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1001
1002 // poll for read sequence to complete
1003 do {
1004 phy_ctl.u64 =
1005 lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1006 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1007
1008 // record the data
1009 dskdat->bytes[byte_lane].bits[bit_index] =
1010 phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
1011 }
1012 }
1013}
1014
1015static void display_deskew_settings(struct ddr_priv *priv, int if_num,
1016 struct deskew_data *dskdat,
1017 int print_enable)
1018{
1019 int byte_lane;
1020 int bit_num;
1021 u16 flags, deskew;
1022 union cvmx_lmcx_config lmc_config;
1023 int byte_limit;
1024 const char *fc = " ?-=+*#&";
1025
1026 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1027 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1028
1029 if (print_enable) {
1030 debug("N0.LMC%d: Deskew Data: Bit => :",
1031 if_num);
1032 for (bit_num = 7; bit_num >= 0; --bit_num)
1033 debug(" %3d ", bit_num);
1034 debug("\n");
1035 }
1036
1037 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1038 if (print_enable)
1039 debug("N0.LMC%d: Bit Deskew Byte %d %s :",
1040 if_num, byte_lane,
1041 (print_enable >= 3) ? "FINAL" : " ");
1042
1043 for (bit_num = 7; bit_num >= 0; --bit_num) {
1044 flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
1045 deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
1046
1047 if (print_enable)
1048 debug(" %3d %c", deskew, fc[flags ^ 1]);
1049
1050 } /* for (bit_num = 7; bit_num >= 0; --bit_num) */
1051
1052 if (print_enable)
1053 debug("\n");
1054 }
1055}
1056
1057static void override_deskew_settings(struct ddr_priv *priv, int if_num,
1058 struct deskew_data *dskdat)
1059{
1060 union cvmx_lmcx_phy_ctl phy_ctl;
1061 union cvmx_lmcx_config lmc_config;
1062
1063 int bit, byte_lane, byte_limit;
1064 u64 csr_data;
1065
1066 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1067 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1068
1069 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1070
1071 phy_ctl.s.phy_reset = 0;
1072 phy_ctl.s.dsk_dbg_num_bits_sel = 1;
1073 phy_ctl.s.dsk_dbg_offset = 0;
1074 phy_ctl.s.dsk_dbg_clk_scaler = 3;
1075
1076 phy_ctl.s.dsk_dbg_wr_mode = 1;
1077 phy_ctl.s.dsk_dbg_load_dis = 0;
1078 phy_ctl.s.dsk_dbg_overwrt_ena = 0;
1079
1080 phy_ctl.s.phy_dsk_reset = 0;
1081
1082 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1083 lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1084
1085 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1086 csr_data = 0;
1087 // FIXME: can we ignore DBI?
1088 for (bit = 0; bit < 8; ++bit) {
1089 // fetch input and adjust
1090 u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) &
1091 0x7F;
1092
1093 /*
1094 * lmc_general_purpose0.data[6:0] // DQ0
1095 * lmc_general_purpose0.data[13:7] // DQ1
1096 * lmc_general_purpose0.data[20:14] // DQ2
1097 * lmc_general_purpose0.data[27:21] // DQ3
1098 * lmc_general_purpose0.data[34:28] // DQ4
1099 * lmc_general_purpose0.data[41:35] // DQ5
1100 * lmc_general_purpose0.data[48:42] // DQ6
1101 * lmc_general_purpose0.data[55:49] // DQ7
1102 * lmc_general_purpose0.data[62:56] // DBI
1103 */
1104 csr_data |= (bits << (7 * bit));
1105
1106 } /* for (bit = 0; bit < 8; ++bit) */
1107
1108 // update GP0 with the bit data for this byte lane
1109 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data);
1110 lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num));
1111
1112 // start the deskew load sequence
1113 phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1114 phy_ctl.s.dsk_dbg_rd_start = 1;
1115 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1116
1117 // poll for read sequence to complete
1118 do {
1119 udelay(100);
1120 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1121 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1122 }
1123
1124 // tell phy to use the new settings
1125 phy_ctl.s.dsk_dbg_overwrt_ena = 1;
1126 phy_ctl.s.dsk_dbg_rd_start = 0;
1127 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1128
1129 phy_ctl.s.dsk_dbg_wr_mode = 0;
1130 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1131}
1132
1133static void process_by_rank_dac(struct ddr_priv *priv, int if_num,
1134 int rank_mask, struct dac_data *dacdat)
1135{
1136 union cvmx_lmcx_config lmc_config;
1137 int rankx, byte_lane;
1138 int byte_limit;
1139 int rank_count;
1140 struct dac_data dacsum;
1141 int lane_probs;
1142
1143 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1144 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1145
1146 memset((void *)&dacsum, 0, sizeof(dacsum));
1147 rank_count = 0;
1148 lane_probs = 0;
1149
1150 for (rankx = 0; rankx < 4; rankx++) {
1151 if (!(rank_mask & (1 << rankx)))
1152 continue;
1153 rank_count++;
1154
1155 display_dac_dbi_settings(if_num, /*dac */ 1,
1156 lmc_config.s.ecc_ena,
1157 &dacdat[rankx].bytes[0],
1158 "By-Ranks VREF");
1159 // sum
1160 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1161 if (rank_count == 2) {
1162 int ranks_diff =
1163 abs((dacsum.bytes[byte_lane] -
1164 dacdat[rankx].bytes[byte_lane]));
1165
1166 // FIXME: is 19 a good number?
1167 if (ranks_diff > 19)
1168 lane_probs |= (1 << byte_lane);
1169 }
1170 dacsum.bytes[byte_lane] +=
1171 dacdat[rankx].bytes[byte_lane];
1172 }
1173 }
1174
1175 // average
1176 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++)
1177 dacsum.bytes[byte_lane] /= rank_count; // FIXME: nint?
1178
1179 display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena,
1180 &dacsum.bytes[0], "All-Rank VREF");
1181
1182 if (lane_probs) {
1183 debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n",
1184 if_num, lane_probs);
1185 }
1186
1187 // finally, write the averaged DAC values
1188 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1189 load_dac_override(priv, if_num, dacsum.bytes[byte_lane],
1190 byte_lane);
1191 }
1192}
1193
1194static void process_by_rank_dsk(struct ddr_priv *priv, int if_num,
1195 int rank_mask, struct deskew_data *dskdat)
1196{
1197 union cvmx_lmcx_config lmc_config;
1198 int rankx, lane, bit;
1199 int byte_limit;
1200 struct deskew_data dsksum, dskcnt;
1201 u16 deskew;
1202
1203 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1204 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1205
1206 memset((void *)&dsksum, 0, sizeof(dsksum));
1207 memset((void *)&dskcnt, 0, sizeof(dskcnt));
1208
1209 for (rankx = 0; rankx < 4; rankx++) {
1210 if (!(rank_mask & (1 << rankx)))
1211 continue;
1212
1213 // sum ranks
1214 for (lane = 0; lane < byte_limit; lane++) {
1215 for (bit = 0; bit < 8; ++bit) {
1216 deskew = dskdat[rankx].bytes[lane].bits[bit];
1217 // if flags indicate sat hi or lo, skip it
1218 if (deskew & 6)
1219 continue;
1220
1221 // clear flags
1222 dsksum.bytes[lane].bits[bit] +=
1223 deskew & ~7;
1224 // count entries
1225 dskcnt.bytes[lane].bits[bit] += 1;
1226 }
1227 }
1228 }
1229
1230 // average ranks
1231 for (lane = 0; lane < byte_limit; lane++) {
1232 for (bit = 0; bit < 8; ++bit) {
1233 int div = dskcnt.bytes[lane].bits[bit];
1234
1235 if (div > 0) {
1236 dsksum.bytes[lane].bits[bit] /= div;
1237 // clear flags
1238 dsksum.bytes[lane].bits[bit] &= ~7;
1239 // set LOCK
1240 dsksum.bytes[lane].bits[bit] |= 1;
1241 } else {
1242 // FIXME? use reset value?
1243 dsksum.bytes[lane].bits[bit] =
1244 (64 << 3) | 1;
1245 }
1246 }
1247 }
1248
1249 // TME for FINAL version
1250 display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3);
1251
1252 // finally, write the averaged DESKEW values
1253 override_deskew_settings(priv, if_num, &dsksum);
1254}
1255
1256struct deskew_counts {
1257 int saturated; // number saturated
1258 int unlocked; // number unlocked
1259 int nibrng_errs; // nibble range errors
1260 int nibunl_errs; // nibble unlocked errors
1261 int bitval_errs; // bit value errors
1262};
1263
1264#define MIN_BITVAL 17
1265#define MAX_BITVAL 110
1266
1267static void validate_deskew_training(struct ddr_priv *priv, int rank_mask,
1268 int if_num, struct deskew_counts *counts,
1269 int print_flags)
1270{
1271 int byte_lane, bit_index, nib_num;
1272 int nibrng_errs, nibunl_errs, bitval_errs;
1273 union cvmx_lmcx_config lmc_config;
1274 s16 nib_min[2], nib_max[2], nib_unl[2];
1275 int byte_limit;
1276 int print_enable = print_flags & 1;
1277 struct deskew_data dskdat;
1278 s16 flags, deskew;
1279 const char *fc = " ?-=+*#&";
1280 int bit_last;
1281
1282 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1283 byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
1284
1285 memset(counts, 0, sizeof(struct deskew_counts));
1286
1287 get_deskew_settings(priv, if_num, &dskdat);
1288
1289 if (print_enable) {
1290 debug("N0.LMC%d: Deskew Settings: Bit => :",
1291 if_num);
1292 for (bit_index = 7; bit_index >= 0; --bit_index)
1293 debug(" %3d ", bit_index);
1294 debug("\n");
1295 }
1296
1297 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1298 if (print_enable)
1299 debug("N0.LMC%d: Bit Deskew Byte %d %s :",
1300 if_num, byte_lane,
1301 (print_flags & 2) ? "FINAL" : " ");
1302
1303 nib_min[0] = 127;
1304 nib_min[1] = 127;
1305 nib_max[0] = 0;
1306 nib_max[1] = 0;
1307 nib_unl[0] = 0;
1308 nib_unl[1] = 0;
1309
1310 if (lmc_config.s.mode32b == 1 && byte_lane == 4) {
1311 bit_last = 3;
1312 if (print_enable)
1313 debug(" ");
1314 } else {
1315 bit_last = 7;
1316 }
1317
1318 for (bit_index = bit_last; bit_index >= 0; --bit_index) {
1319 nib_num = (bit_index > 3) ? 1 : 0;
1320
1321 flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
1322 deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
1323
1324 counts->saturated += !!(flags & 6);
1325
1326 // Do range calc even when locked; it could happen
1327 // that a bit is still unlocked after final retry,
1328 // and we want to have an external retry if a RANGE
1329 // error is present at exit...
1330 nib_min[nib_num] = min(nib_min[nib_num], deskew);
1331 nib_max[nib_num] = max(nib_max[nib_num], deskew);
1332
1333 if (!(flags & 1)) { // only when not locked
1334 counts->unlocked += 1;
1335 nib_unl[nib_num] += 1;
1336 }
1337
1338 if (print_enable)
1339 debug(" %3d %c", deskew, fc[flags ^ 1]);
1340 }
1341
1342 /*
1343 * Now look for nibble errors
1344 *
1345 * For bit 55, it looks like a bit deskew problem. When the
1346 * upper nibble of byte 6 needs to go to saturation, bit 7
1347 * of byte 6 locks prematurely at 64. For DIMMs with raw
1348 * card A and B, can we reset the deskew training when we
1349 * encounter this case? The reset criteria should be looking
1350 * at one nibble at a time for raw card A and B; if the
1351 * bit-deskew setting within a nibble is different by > 33,
1352 * we'll issue a reset to the bit deskew training.
1353 *
1354 * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
1355 */
1356 // upper nibble range, then lower nibble range
1357 nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
1358 nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
1359
1360 // check for nibble all unlocked
1361 nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
1362
1363 // check for bit value errors, ie < 17 or > 110
1364 // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
1365 bitval_errs = ((nib_max[1] > MAX_BITVAL) ||
1366 (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
1367 bitval_errs |= ((nib_min[1] < MIN_BITVAL) ||
1368 (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
1369
1370 if ((nibrng_errs != 0 || nibunl_errs != 0 ||
1371 bitval_errs != 0) && print_enable) {
1372 debug(" %c%c%c",
1373 (nibrng_errs) ? 'R' : ' ',
1374 (nibunl_errs) ? 'U' : ' ',
1375 (bitval_errs) ? 'V' : ' ');
1376 }
1377
1378 if (print_enable)
1379 debug("\n");
1380
1381 counts->nibrng_errs |= (nibrng_errs << byte_lane);
1382 counts->nibunl_errs |= (nibunl_errs << byte_lane);
1383 counts->bitval_errs |= (bitval_errs << byte_lane);
1384 }
1385}
1386
1387static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
1388 int dac_value, int byte)
1389{
1390 union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1391 // single bytelanes incr by 1; A is for ALL
1392 int bytex = (byte == 0x0A) ? byte : byte + 1;
1393
1394 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1395
1396 SET_DDR_DLL_CTL3(byte_sel, bytex);
1397 SET_DDR_DLL_CTL3(offset, dac_value >> 1);
1398
1399 ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */
1400 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1401
1402 ddr_dll_ctl3.cn73xx.bit_select = 0xC; /* vref bypass setting load */
1403 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1404
1405 ddr_dll_ctl3.cn73xx.bit_select = 0xD; /* vref bypass on. */
1406 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1407
1408 ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */
1409 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1410
1411 lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); // flush writes
1412
1413 return (unsigned short)GET_DDR_DLL_CTL3(offset);
1414}
1415
1416// arg dac_or_dbi is 1 for DAC, 0 for DBI
1417// returns 9 entries (bytelanes 0 through 8) in settings[]
1418// returns 0 if OK, -1 if a problem
1419static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num,
1420 int dac_or_dbi, int *settings)
1421{
1422 union cvmx_lmcx_phy_ctl phy_ctl;
1423 int byte_lane, bit_num;
1424 int deskew;
1425 int dac_value;
1426 int new_deskew_layout = 0;
1427
1428 new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) ||
1429 octeon_is_cpuid(OCTEON_CNF75XX);
1430 new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) &&
1431 !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X));
1432
1433 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1434 phy_ctl.s.dsk_dbg_clk_scaler = 3;
1435 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1436
1437 bit_num = (dac_or_dbi) ? 4 : 5;
1438 // DBI not available
1439 if (bit_num == 5 && !new_deskew_layout)
1440 return -1;
1441
1442 // FIXME: always assume ECC is available
1443 for (byte_lane = 8; byte_lane >= 0; --byte_lane) {
1444 //set byte lane and bit to read
1445 phy_ctl.s.dsk_dbg_bit_sel = bit_num;
1446 phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1447 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1448
1449 //start read sequence
1450 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1451 phy_ctl.s.dsk_dbg_rd_start = 1;
1452 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1453
1454 //poll for read sequence to complete
1455 do {
1456 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1457 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1458
1459 // keep the flag bits where they are for DBI
1460 deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */
1461 dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
1462
1463 settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
1464 }
1465
1466 return 0;
1467}
1468
1469// print out the DBI settings array
1470// arg dac_or_dbi is 1 for DAC, 0 for DBI
1471static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
1472 int ecc_ena, int *settings, char *title)
1473{
1474 int byte;
1475 int flags;
1476 int deskew;
1477 const char *fc = " ?-=+*#&";
1478
1479 debug("N0.LMC%d: %s %s Settings %d:0 :",
1480 lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena);
1481 // FIXME: what about 32-bit mode?
1482 for (byte = (7 + ecc_ena); byte >= 0; --byte) {
1483 if (dac_or_dbi) { // DAC
1484 flags = 1; // say its locked to get blank
1485 deskew = settings[byte] & 0xff;
1486 } else { // DBI
1487 flags = settings[byte] & 7;
1488 deskew = (settings[byte] >> 3) & 0x7f;
1489 }
1490 debug(" %3d %c", deskew, fc[flags ^ 1]);
1491 }
1492 debug("\n");
1493}
1494
1495// Find a HWL majority
1496static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc,
1497 int *xc, int *cc)
1498{
1499 int ix, ic;
1500
1501 *mx = -1;
1502 *mc = 0;
1503 *xc = 0;
1504 *cc = 0;
1505
1506 for (ix = 0; ix < 4; ix++) {
1507 ic = bc->bitcnt[ix];
1508
1509 // make a bitmask of the ones with a count
1510 if (ic > 0) {
1511 *mc |= (1 << ix);
1512 *cc += 1; // count how many had non-zero counts
1513 }
1514
1515 // find the majority
1516 if (ic > *xc) { // new max?
1517 *xc = ic; // yes
1518 *mx = ix; // set its index
1519 }
1520 }
1521
1522 return (*mx << 1);
1523}
1524
1525// Evaluate the DAC settings array
1526static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings)
1527{
1528 int byte, lane, dac, comp;
1529 int last = (if_64b) ? 7 : 3;
1530
1531 // FIXME: change the check...???
1532 // this looks only for sets of DAC values whose max/min differ by a lot
1533 // let any EVEN go so long as it is within range...
1534 for (byte = (last + ecc_ena); byte >= 0; --byte) {
1535 dac = settings[byte] & 0xff;
1536
1537 for (lane = (last + ecc_ena); lane >= 0; --lane) {
1538 comp = settings[lane] & 0xff;
1539 if (abs((dac - comp)) > 25)
1540 return 1;
1541 }
1542 }
1543
1544 return 0;
1545}
1546
1547static void perform_offset_training(struct ddr_priv *priv, int rank_mask,
1548 int if_num)
1549{
1550 union cvmx_lmcx_phy_ctl lmc_phy_ctl;
1551 u64 orig_phy_ctl;
1552 const char *s;
1553
1554 /*
1555 * 4.8.6 LMC Offset Training
1556 *
1557 * LMC requires input-receiver offset training.
1558 *
1559 * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
1560 */
1561 lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1562 orig_phy_ctl = lmc_phy_ctl.u64;
1563 lmc_phy_ctl.s.dac_on = 1;
1564
1565 // allow full CSR override
1566 s = lookup_env_ull(priv, "ddr_phy_ctl");
1567 if (s)
1568 lmc_phy_ctl.u64 = strtoull(s, NULL, 0);
1569
1570 // do not print or write if CSR does not change...
1571 if (lmc_phy_ctl.u64 != orig_phy_ctl) {
1572 debug("PHY_CTL : 0x%016llx\n",
1573 lmc_phy_ctl.u64);
1574 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64);
1575 }
1576
1577 /*
1578 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
1579 * LMC(0)_SEQ_CTL[INIT_START] = 1.
1580 *
1581 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1582 */
1583 /* Start Offset training sequence */
1584 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B);
1585}
1586
1587static void perform_internal_vref_training(struct ddr_priv *priv,
1588 int rank_mask, int if_num)
1589{
1590 union cvmx_lmcx_ext_config ext_config;
1591 union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1592
1593 // First, make sure all byte-lanes are out of VREF bypass mode
1594 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1595
1596 ddr_dll_ctl3.cn78xx.byte_sel = 0x0A; /* all byte-lanes */
1597 ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */
1598 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1599
1600 ddr_dll_ctl3.cn78xx.bit_select = 0x0E; /* vref bypass off. */
1601 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1602
1603 ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */
1604 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1605
1606 /*
1607 * 4.8.7 LMC Internal vref Training
1608 *
1609 * LMC requires input-reference-voltage training.
1610 *
1611 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
1612 */
1613 ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1614 ext_config.s.vrefint_seq_deskew = 0;
1615
1616 ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1617 ext_config.s.vrefint_seq_deskew);
1618
1619 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1620
1621 /*
1622 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
1623 * LMC(0)_SEQ_CTL[INIT_START] = 1.
1624 *
1625 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1626 */
1627 /* Start LMC Internal vref Training */
1628 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1629}
1630
1631#define dbg_avg(format, ...) // debug(format, ##__VA_ARGS__)
1632
1633static int process_samples_average(s16 *bytes, int num_samples,
1634 int lmc, int lane_no)
1635{
1636 int i, sadj, sum = 0, ret, asum, trunc;
1637 s16 smin = 32767, smax = -32768;
1638 int nmin, nmax;
1639 //int rng;
1640
1641 dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
1642
1643 for (i = 0; i < num_samples; i++) {
1644 sum += bytes[i];
1645 if (bytes[i] < smin)
1646 smin = bytes[i];
1647 if (bytes[i] > smax)
1648 smax = bytes[i];
1649 dbg_avg(" %3d", bytes[i]);
1650 }
1651
1652 nmin = 0;
1653 nmax = 0;
1654 for (i = 0; i < num_samples; i++) {
1655 if (bytes[i] == smin)
1656 nmin += 1;
1657 if (bytes[i] == smax)
1658 nmax += 1;
1659 }
1660 dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)",
1661 smin, nmin, smax, nmax, rng, num_samples);
1662
1663 asum = sum - smin - smax;
1664
1665 sadj = divide_nint(asum * 10, (num_samples - 2));
1666
1667 trunc = asum / (num_samples - 2);
1668
1669 dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc);
1670
1671 sadj = divide_nint(sadj, 10);
1672 if (trunc & 1)
1673 ret = trunc;
1674 else if (sadj & 1)
1675 ret = sadj;
1676 else
1677 ret = trunc + 1;
1678
1679 dbg_avg(" -> %3d\n", ret);
1680
1681 return ret;
1682}
1683
1684#define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries
1685
1686#define default_lock_retry_limit 20 // 20 retries
1687#define deskew_validation_delay 10000 // 10 millisecs
1688
1689static int perform_deskew_training(struct ddr_priv *priv, int rank_mask,
1690 int if_num, int spd_rawcard_aorb)
1691{
1692 int unsaturated, locked;
1693 int sat_retries, sat_retries_limit;
1694 int lock_retries, lock_retries_total, lock_retries_limit;
1695 int print_first;
1696 int print_them_all;
1697 struct deskew_counts dsk_counts;
1698 union cvmx_lmcx_phy_ctl phy_ctl;
1699 char *s;
1700 int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
1701 octeon_is_cpuid(OCTEON_CNF75XX);
1702 int disable_bitval_retries = 1; // default to disabled
1703
1704 debug("N0.LMC%d: Performing Deskew Training.\n", if_num);
1705
1706 sat_retries = 0;
1707 sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT;
1708
1709 lock_retries_total = 0;
1710 unsaturated = 0;
1711 print_first = 1; // print the first one
1712 // set to true for printing all normal deskew attempts
1713 print_them_all = 0;
1714
1715 // provide override for bitval_errs causing internal VREF retries
1716 s = env_get("ddr_disable_bitval_retries");
1717 if (s)
1718 disable_bitval_retries = !!simple_strtoul(s, NULL, 0);
1719
1720 lock_retries_limit = default_lock_retry_limit;
1721 if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) ||
1722 (octeon_is_cpuid(OCTEON_CN73XX)) ||
1723 (octeon_is_cpuid(OCTEON_CNF75XX)))
1724 lock_retries_limit *= 2; // give new chips twice as many
1725
1726 do { /* while (sat_retries < sat_retry_limit) */
1727 /*
1728 * 4.8.8 LMC Deskew Training
1729 *
1730 * LMC requires input-read-data deskew training.
1731 *
1732 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
1733 */
1734
1735 union cvmx_lmcx_ext_config ext_config;
1736
1737 ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1738 ext_config.s.vrefint_seq_deskew = 1;
1739
1740 ddr_seq_print
1741 ("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1742 ext_config.s.vrefint_seq_deskew);
1743
1744 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1745
1746 /*
1747 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
1748 * LMC(0)_SEQ_CTL[INIT_START] = 1.
1749 *
1750 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1751 */
1752
1753 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1754 phy_ctl.s.phy_dsk_reset = 1; /* RESET Deskew sequence */
1755 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1756
1757 /* LMC Deskew Training */
1758 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1759
1760 lock_retries = 0;
1761
1762perform_deskew_training:
1763
1764 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1765 phy_ctl.s.phy_dsk_reset = 0; /* Normal Deskew sequence */
1766 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1767
1768 /* LMC Deskew Training */
1769 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1770
1771 // Moved this from validate_deskew_training
1772 /* Allow deskew results to stabilize before evaluating them. */
1773 udelay(deskew_validation_delay);
1774
1775 // Now go look at lock and saturation status...
1776 validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
1777 print_first);
1778 // after printing the first and not doing them all, no more
1779 if (print_first && !print_them_all)
1780 print_first = 0;
1781
1782 unsaturated = (dsk_counts.saturated == 0);
1783 locked = (dsk_counts.unlocked == 0);
1784
1785 // only do locking retries if unsaturated or rawcard A or B,
1786 // otherwise full SAT retry
1787 if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) {
1788 if (!locked) { // and not locked
1789 lock_retries++;
1790 lock_retries_total++;
1791 if (lock_retries <= lock_retries_limit) {
1792 goto perform_deskew_training;
1793 } else {
1794 debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n",
1795 if_num, lock_retries_limit);
1796 }
1797 } else {
1798 // only print if we did try
1799 if (lock_retries_total > 0)
1800 debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n",
1801 if_num, lock_retries);
1802 }
1803 } /* if (unsaturated || spd_rawcard_aorb) */
1804
1805 ++sat_retries;
1806
1807 /*
1808 * At this point, check for a DDR4 RDIMM that will not
1809 * benefit from SAT retries; if so, exit
1810 */
1811 if (spd_rawcard_aorb && !has_no_sat) {
1812 debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
1813 if_num);
1814 break; // no sat or lock retries
1815 }
1816
1817 } while (!unsaturated && (sat_retries < sat_retries_limit));
1818
1819 debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n",
1820 if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ?
1821 "Timed Out" : "Completed", sat_retries - 1, lock_retries_total);
1822
1823 // FIXME? add saturation to reasons for fault return - give it a
1824 // chance via Internal VREF
1825 // FIXME? add OPTIONAL bit value to reasons for fault return -
1826 // give it a chance via Internal VREF
1827 if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 ||
1828 (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) ||
1829 !unsaturated) {
1830 debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n",
1831 if_num);
1832 // FIXME: do we want this output always for errors?
1833 validate_deskew_training(priv, rank_mask, if_num,
1834 &dsk_counts, 1);
1835 return -1; // we did retry locally, they did not help
1836 }
1837
1838 // NOTE: we (currently) always print one last training validation
1839 // before starting Read Leveling...
1840
1841 return 0;
1842}
1843
1844#define SCALING_FACTOR (1000)
1845
1846// NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config
1847static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl,
1848 int rank_count, int dram_connection)
1849{
1850 u64 reff_s;
1851 u64 rser_s = (dram_connection) ? 0 : 15;
1852 u64 vdd = 1200;
1853 u64 vref;
1854 // 99 == HiZ
1855 u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1856 1 * 1024 * 1024 : rtt_wr);
1857 u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) &&
1858 (rtt_wr != 0))) ?
1859 1 * 1024 * 1024 : rtt_park);
1860 u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1861 int vref_value;
1862 u64 rangepc = 6000; // range1 base
1863 u64 vrefpc;
1864 int vref_range = 0;
1865
1866 reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s));
1867
1868 vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) /
1869 (rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR;
1870
1871 vref = (vref * vdd) / 2 / SCALING_FACTOR;
1872
1873 vrefpc = (vref * 100 * 100) / vdd;
1874
1875 if (vrefpc < rangepc) { // < range1 base, use range2
1876 vref_range = 1 << 6; // set bit A6 for range2
1877 rangepc = 4500; // range2 base is 45%
1878 }
1879
1880 vref_value = divide_nint(vrefpc - rangepc, 65);
1881 if (vref_value < 0)
1882 vref_value = vref_range; // set to base of range
1883 else
1884 vref_value |= vref_range;
1885
1886 debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n",
1887 rtt_wr, rtt_park, dqx_ctl, rank_count);
1888 debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n",
1889 rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range,
1890 vref_range ? 2 : 1);
1891
1892 return vref_value;
1893}
1894
1895// NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs
1896static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00,
1897 int rtt_park_01,
1898 int dqx_ctl, int rtt_nom,
1899 int dram_connection)
1900{
1901 u64 rser = (dram_connection) ? 0 : 15;
1902 u64 vdd = 1200;
1903 u64 vl, vlp, vcm;
1904 u64 rd0, rd1, rpullup;
1905 // 99 == HiZ
1906 u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1907 1 * 1024 * 1024 : rtt_wr);
1908 u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00);
1909 u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01);
1910 u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1911 u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom);
1912 int vref_value;
1913 u64 rangepc = 6000; // range1 base
1914 u64 vrefpc;
1915 int vref_range = 0;
1916
1917 // rd0 = (RTT_NOM (parallel) RTT_WR) + =
1918 // ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER
1919 rd0 = divide_nint((rtt_nom_s * rtt_wr_s),
1920 (rtt_nom_s + rtt_wr_s)) + rser;
1921
1922 // rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER =
1923 // ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER
1924 rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s),
1925 (rtt_park_00_s + rtt_park_01_s)) + rser;
1926
1927 // rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1)
1928 rpullup = divide_nint((rd0 * rd1), (rd0 + rd1));
1929
1930 // vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2
1931 vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup));
1932
1933 // vlp = ((RSER / rd0) * (1.2 - vl)) + vl
1934 vlp = divide_nint((rser * (vdd - vl)), rd0) + vl;
1935
1936 // vcm = (vlp + 1.2) / 2
1937 vcm = divide_nint((vlp + vdd), 2);
1938
1939 // vrefpc = (vcm / 1.2) * 100
1940 vrefpc = divide_nint((vcm * 100 * 100), vdd);
1941
1942 if (vrefpc < rangepc) { // < range1 base, use range2
1943 vref_range = 1 << 6; // set bit A6 for range2
1944 rangepc = 4500; // range2 base is 45%
1945 }
1946
1947 vref_value = divide_nint(vrefpc - rangepc, 65);
1948 if (vref_value < 0)
1949 vref_value = vref_range; // set to base of range
1950 else
1951 vref_value |= vref_range;
1952
1953 debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n",
1954 rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value,
1955 vref_value);
1956
1957 return vref_value;
1958}
1959
1960// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
1961static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx,
1962 int dimm_count, int rank_count,
1963 struct impedence_values *imp_values,
1964 int is_stacked_die, int dram_connection)
1965{
1966 int computed_final_vref_value = 0;
1967 int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT;
1968 const char *s;
1969 int rtt_wr, dqx_ctl, rtt_nom, index;
1970 union cvmx_lmcx_modereg_params1 lmc_modereg_params1;
1971 union cvmx_lmcx_modereg_params2 lmc_modereg_params2;
1972 union cvmx_lmcx_comp_ctl2 comp_ctl2;
1973 int rtt_park;
1974 int rtt_park_00;
1975 int rtt_park_01;
1976
1977 debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n",
1978 if_num, rankx, __func__, dram_connection);
1979
1980 // allow some overrides...
1981 s = env_get("ddr_adjust_computed_vref");
1982 if (s) {
1983 enable_adjust = !!simple_strtoul(s, NULL, 0);
1984 if (!enable_adjust) {
1985 debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n",
1986 if_num, rankx);
1987 }
1988 }
1989
1990 s = env_get("ddr_set_computed_vref");
1991 if (s) {
1992 int new_vref = simple_strtoul(s, NULL, 0);
1993
1994 debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n",
1995 if_num, rankx, new_vref, new_vref);
1996 return new_vref;
1997 }
1998
1999 /*
2000 * Calculate an alternative to the measured vref value
2001 * but only for configurations we know how to...
2002 */
2003 // We have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
2004 // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot
2005 // configs, and can use the 2-rank 2-slot code for 1-rank DIMMs
2006 // in 2-slot configs.
2007
2008 lmc_modereg_params1.u64 =
2009 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
2010 lmc_modereg_params2.u64 =
2011 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num));
2012 comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
2013 dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
2014
2015 // WR always comes from the current rank
2016 index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03;
2017 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
2018 index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04;
2019 rtt_wr = imp_values->rtt_wr_ohms[index];
2020
2021 // separate calculations for 1 vs 2 DIMMs per LMC
2022 if (dimm_count == 1) {
2023 // PARK comes from this rank if 1-rank, otherwise other rank
2024 index =
2025 (lmc_modereg_params2.u64 >>
2026 ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
2027 rtt_park = imp_values->rtt_nom_ohms[index];
2028 computed_final_vref_value =
2029 compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl,
2030 rank_count, dram_connection);
2031 } else {
2032 // get both PARK values from the other DIMM
2033 index =
2034 (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) &
2035 0x07;
2036 rtt_park_00 = imp_values->rtt_nom_ohms[index];
2037 index =
2038 (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) &
2039 0x07;
2040 rtt_park_01 = imp_values->rtt_nom_ohms[index];
2041 // NOM comes from this rank if 1-rank, otherwise other rank
2042 index =
2043 (lmc_modereg_params1.u64 >>
2044 ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
2045 rtt_nom = imp_values->rtt_nom_ohms[index];
2046 computed_final_vref_value =
2047 compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01,
2048 dqx_ctl, rtt_nom, dram_connection);
2049 }
2050
2051 if (enable_adjust) {
2052 union cvmx_lmcx_config lmc_config;
2053 union cvmx_lmcx_control lmc_control;
2054
2055 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
2056 lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2057
2058 /*
2059 * New computed vref = existing computed vref – X
2060 *
2061 * The value of X is depending on different conditions.
2062 * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked
2063 * die 2Rx4, so I conclude the results into two conditions:
2064 *
2065 * 1. Stacked Die: 2Rx4
2066 * 1-slot: offset = 7. i, e New computed vref = existing
2067 * computed vref – 7
2068 * 2-slot: offset = 6
2069 *
2070 * 2. Regular: 2Rx4
2071 * 1-slot: offset = 3
2072 * 2-slot: offset = 2
2073 */
2074 // we know we never get called unless DDR4, so test just
2075 // the other conditions
2076 if (lmc_control.s.rdimm_ena == 1 &&
2077 rank_count == 2 && lmc_config.s.mode_x4dev) {
2078 // it must first be RDIMM and 2-rank and x4
2079 int adj;
2080
2081 // now do according to stacked die or not...
2082 if (is_stacked_die)
2083 adj = (dimm_count == 1) ? -7 : -6;
2084 else
2085 adj = (dimm_count == 1) ? -3 : -2;
2086
2087 // we must have adjusted it, so print it out if
2088 // verbosity is right
2089 debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
2090 if_num, rankx, computed_final_vref_value,
2091 computed_final_vref_value,
2092 computed_final_vref_value + adj,
2093 computed_final_vref_value + adj);
2094 computed_final_vref_value += adj;
2095 }
2096 }
2097
2098 return computed_final_vref_value;
2099}
2100
2101static void unpack_rlevel_settings(int if_bytemask, int ecc_ena,
2102 struct rlevel_byte_data *rlevel_byte,
2103 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)
2104{
2105 if ((if_bytemask & 0xff) == 0xff) {
2106 if (ecc_ena) {
2107 rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7;
2108 rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6;
2109 rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5;
2110 rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4;
2111 /* ECC */
2112 rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8;
2113 } else {
2114 rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;
2115 rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;
2116 rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;
2117 rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;
2118 }
2119 } else {
2120 rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8; /* unused */
2121 rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; /* unused */
2122 rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; /* unused */
2123 rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; /* unused */
2124 rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; /* ECC */
2125 }
2126
2127 rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3;
2128 rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2;
2129 rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1;
2130 rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0;
2131}
2132
2133static void pack_rlevel_settings(int if_bytemask, int ecc_ena,
2134 struct rlevel_byte_data *rlevel_byte,
2135 union cvmx_lmcx_rlevel_rankx
2136 *final_rlevel_rank)
2137{
2138 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank;
2139
2140 if ((if_bytemask & 0xff) == 0xff) {
2141 if (ecc_ena) {
2142 lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay;
2143 lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay;
2144 lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay;
2145 lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay;
2146 /* ECC */
2147 lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay;
2148 } else {
2149 lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2150 lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2151 lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2152 lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2153 }
2154 } else {
2155 lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay;
2156 lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2157 lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2158 lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2159 lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2160 }
2161
2162 lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay;
2163 lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay;
2164 lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay;
2165 lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay;
2166
2167 *final_rlevel_rank = lmc_rlevel_rank;
2168}
2169
2170/////////////////// These are the RLEVEL settings display routines
2171
2172// flags
2173#define WITH_NOTHING 0
2174#define WITH_SCORE 1
2175#define WITH_AVERAGE 2
2176#define WITH_FINAL 4
2177#define WITH_COMPUTE 8
2178
2179static void do_display_rl(int if_num,
2180 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2181 int rank, int flags, int score)
2182{
2183 char score_buf[16];
2184 char *msg_buf;
2185 char hex_buf[20];
2186
2187 if (flags & WITH_SCORE) {
2188 snprintf(score_buf, sizeof(score_buf), "(%d)", score);
2189 } else {
2190 score_buf[0] = ' ';
2191 score_buf[1] = 0;
2192 }
2193
2194 if (flags & WITH_AVERAGE) {
2195 msg_buf = " DELAY AVERAGES ";
2196 } else if (flags & WITH_FINAL) {
2197 msg_buf = " FINAL SETTINGS ";
2198 } else if (flags & WITH_COMPUTE) {
2199 msg_buf = " COMPUTED DELAYS ";
2200 } else {
2201 snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2202 (unsigned long long)lmc_rlevel_rank.u64);
2203 msg_buf = hex_buf;
2204 }
2205
2206 debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
2207 if_num, rank, lmc_rlevel_rank.s.status, msg_buf,
2208 lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7,
2209 lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5,
2210 lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3,
2211 lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1,
2212 lmc_rlevel_rank.s.byte0, score_buf);
2213}
2214
2215static void display_rl(int if_num,
2216 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank)
2217{
2218 do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0);
2219}
2220
2221static void display_rl_with_score(int if_num,
2222 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2223 int rank, int score)
2224{
2225 do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score);
2226}
2227
2228static void display_rl_with_final(int if_num,
2229 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2230 int rank)
2231{
2232 do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0);
2233}
2234
2235static void display_rl_with_computed(int if_num,
2236 union cvmx_lmcx_rlevel_rankx
2237 lmc_rlevel_rank, int rank, int score)
2238{
2239 do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score);
2240}
2241
2242// flag values
2243#define WITH_RODT_BLANK 0
2244#define WITH_RODT_SKIPPING 1
2245#define WITH_RODT_BESTROW 2
2246#define WITH_RODT_BESTSCORE 3
2247// control
2248#define SKIP_SKIPPING 1
2249
2250static const char *with_rodt_canned_msgs[4] = {
2251 " ", "SKIPPING ", "BEST ROW ", "BEST SCORE"
2252};
2253
2254static void display_rl_with_rodt(int if_num,
2255 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2256 int rank, int score,
2257 int nom_ohms, int rodt_ohms, int flag)
2258{
2259 const char *msg_buf;
2260 char set_buf[20];
2261
2262#if SKIP_SKIPPING
2263 if (flag == WITH_RODT_SKIPPING)
2264 return;
2265#endif
2266
2267 msg_buf = with_rodt_canned_msgs[flag];
2268 if (nom_ohms < 0) {
2269 snprintf(set_buf, sizeof(set_buf), " RODT %3d ",
2270 rodt_ohms);
2271 } else {
2272 snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms,
2273 rodt_ohms);
2274 }
2275
2276 debug("N0.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
2277 if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8,
2278 lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6,
2279 lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4,
2280 lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2,
2281 lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score);
2282}
2283
2284static void do_display_wl(int if_num,
2285 union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2286 int rank, int flags)
2287{
2288 char *msg_buf;
2289 char hex_buf[20];
2290
2291 if (flags & WITH_FINAL) {
2292 msg_buf = " FINAL SETTINGS ";
2293 } else {
2294 snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2295 (unsigned long long)lmc_wlevel_rank.u64);
2296 msg_buf = hex_buf;
2297 }
2298
2299 debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2300 if_num, rank, lmc_wlevel_rank.s.status, msg_buf,
2301 lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7,
2302 lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5,
2303 lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3,
2304 lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1,
2305 lmc_wlevel_rank.s.byte0);
2306}
2307
2308static void display_wl(int if_num,
2309 union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank)
2310{
2311 do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING);
2312}
2313
2314static void display_wl_with_final(int if_num,
2315 union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2316 int rank)
2317{
2318 do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL);
2319}
2320
2321// pretty-print bitmask adjuster
2322static u64 ppbm(u64 bm)
2323{
2324 if (bm != 0ul) {
2325 while ((bm & 0x0fful) == 0ul)
2326 bm >>= 4;
2327 }
2328
2329 return bm;
2330}
2331
2332// xlate PACKED index to UNPACKED index to use with rlevel_byte
2333#define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4))
2334// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
2335#define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i))
2336
2337// flag values
2338#define WITH_WL_BITMASKS 0
2339#define WITH_RL_BITMASKS 1
2340#define WITH_RL_MASK_SCORES 2
2341#define WITH_RL_SEQ_SCORES 3
2342
2343static void do_display_bm(int if_num, int rank, void *bm,
2344 int flags, int ecc)
2345{
2346 if (flags == WITH_WL_BITMASKS) {
2347 // wlevel_bitmask array in PACKED index order, so just
2348 // print them
2349 int *bitmasks = (int *)bm;
2350
2351 debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
2352 if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6],
2353 bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2],
2354 bitmasks[1], bitmasks[0]
2355 );
2356 } else if (flags == WITH_RL_BITMASKS) {
2357 // rlevel_bitmask array in PACKED index order, so just
2358 // print them
2359 struct rlevel_bitmask *rlevel_bitmask =
2360 (struct rlevel_bitmask *)bm;
2361
2362 debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
2363 if_num, rank, ppbm(rlevel_bitmask[8].bm),
2364 ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm),
2365 ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm),
2366 ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm),
2367 ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm)
2368 );
2369 } else if (flags == WITH_RL_MASK_SCORES) {
2370 // rlevel_bitmask array in PACKED index order, so just
2371 // print them
2372 struct rlevel_bitmask *rlevel_bitmask =
2373 (struct rlevel_bitmask *)bm;
2374
2375 debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2376 if_num, rank, rlevel_bitmask[8].errs,
2377 rlevel_bitmask[7].errs, rlevel_bitmask[6].errs,
2378 rlevel_bitmask[5].errs, rlevel_bitmask[4].errs,
2379 rlevel_bitmask[3].errs, rlevel_bitmask[2].errs,
2380 rlevel_bitmask[1].errs, rlevel_bitmask[0].errs);
2381 } else if (flags == WITH_RL_SEQ_SCORES) {
2382 // rlevel_byte array in UNPACKED index order, so xlate
2383 // and print them
2384 struct rlevel_byte_data *rlevel_byte =
2385 (struct rlevel_byte_data *)bm;
2386
2387 debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2388 if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs,
2389 rlevel_byte[XPU(7, ecc)].sqerrs,
2390 rlevel_byte[XPU(6, ecc)].sqerrs,
2391 rlevel_byte[XPU(5, ecc)].sqerrs,
2392 rlevel_byte[XPU(4, ecc)].sqerrs,
2393 rlevel_byte[XPU(3, ecc)].sqerrs,
2394 rlevel_byte[XPU(2, ecc)].sqerrs,
2395 rlevel_byte[XPU(1, ecc)].sqerrs,
2396 rlevel_byte[XPU(0, ecc)].sqerrs);
2397 }
2398}
2399
2400static void display_wl_bm(int if_num, int rank, int *bitmasks)
2401{
2402 do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
2403}
2404
2405static void display_rl_bm(int if_num, int rank,
2406 struct rlevel_bitmask *bitmasks, int ecc_ena)
2407{
2408 do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS,
2409 ecc_ena);
2410}
2411
2412static void display_rl_bm_scores(int if_num, int rank,
2413 struct rlevel_bitmask *bitmasks, int ecc_ena)
2414{
2415 do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES,
2416 ecc_ena);
2417}
2418
2419static void display_rl_seq_scores(int if_num, int rank,
2420 struct rlevel_byte_data *bytes, int ecc_ena)
2421{
2422 do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
2423}
2424
2425#define RODT_OHMS_COUNT 8
2426#define RTT_NOM_OHMS_COUNT 8
2427#define RTT_NOM_TABLE_COUNT 8
2428#define RTT_WR_OHMS_COUNT 8
2429#define DIC_OHMS_COUNT 3
2430#define DRIVE_STRENGTH_COUNT 15
2431
2432static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = {
2433 0, 40, 60, 80, 120, 240, 34, 48 };
2434static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2435 0, 60, 120, 40, 240, 48, 80, 34 };
2436static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2437 0, 4, 2, 6, 1, 5, 3, 7 };
2438// setting HiZ ohms to 99 for computed vref
2439static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = {
2440 0, 120, 240, 99, 80 };
2441static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 };
2442static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {
2443 0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 };
2444static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = {
2445 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2446struct impedence_values ddr4_impedence_val = {
2447 .rodt_ohms = ddr4_rodt_ohms,
2448 .rtt_nom_ohms = ddr4_rtt_nom_ohms,
2449 .rtt_nom_table = ddr4_rtt_nom_table,
2450 .rtt_wr_ohms = ddr4_rtt_wr_ohms,
2451 .dic_ohms = ddr4_dic_ohms,
2452 .drive_strength = ddr4_drive_strength,
2453 .dqx_strength = ddr4_dqx_strength,
2454};
2455
2456static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = {
2457 0, 20, 30, 40, 60, 120, 0, 0 };
2458static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2459 0, 60, 120, 40, 20, 30, 0, 0 };
2460static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2461 0, 2, 1, 3, 5, 4, 0, 0 };
2462static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 };
2463static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 };
2464static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = {
2465 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2466static struct impedence_values ddr3_impedence_val = {
2467 .rodt_ohms = ddr3_rodt_ohms,
2468 .rtt_nom_ohms = ddr3_rtt_nom_ohms,
2469 .rtt_nom_table = ddr3_rtt_nom_table,
2470 .rtt_wr_ohms = ddr3_rtt_wr_ohms,
2471 .dic_ohms = ddr3_dic_ohms,
2472 .drive_strength = ddr3_drive_strength,
2473 .dqx_strength = ddr3_drive_strength,
2474};
2475
2476static u64 hertz_to_psecs(u64 hertz)
2477{
2478 /* Clock in psecs */
2479 return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz);
2480}
2481
2482#define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */
2483
2484static u64 psecs_to_mts(u64 psecs)
2485{
2486 return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE),
2487 psecs), DIVIDEND_SCALE);
2488}
2489
2490#define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m))))
2491
2492static unsigned long pretty_psecs_to_mts(u64 psecs)
2493{
2494 u64 ret = 0; // default to error
2495
2496 if (WITHIN(psecs, 2500, 1))
2497 ret = 800;
2498 else if (WITHIN(psecs, 1875, 1))
2499 ret = 1066;
2500 else if (WITHIN(psecs, 1500, 1))
2501 ret = 1333;
2502 else if (WITHIN(psecs, 1250, 1))
2503 ret = 1600;
2504 else if (WITHIN(psecs, 1071, 1))
2505 ret = 1866;
2506 else if (WITHIN(psecs, 937, 1))
2507 ret = 2133;
2508 else if (WITHIN(psecs, 833, 1))
2509 ret = 2400;
2510 else if (WITHIN(psecs, 750, 1))
2511 ret = 2666;
2512 return ret;
2513}
2514
2515static u64 mts_to_hertz(u64 mts)
2516{
2517 return ((mts * 1000 * 1000) / 2);
2518}
2519
2520static int compute_rc3x(int64_t tclk_psecs)
2521{
2522 long speed;
2523 long tclk_psecs_min, tclk_psecs_max;
2524 long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
2525 int rc3x;
2526
2527#define ENCODING_BASE 1240
2528
2529 data_rate_mhz = psecs_to_mts(tclk_psecs);
2530
2531 /*
2532 * 2400 MT/s is a special case. Using integer arithmetic it rounds
2533 * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
2534 * proper setting from the table.
2535 */
2536 if (tclk_psecs == 833)
2537 data_rate_mhz = 2400;
2538
2539 for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
2540 int error = 0;
2541
2542 /* Clock in psecs */
2543 tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00));
2544 /* Clock in psecs */
2545 tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18));
2546
2547 data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
2548 data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
2549
2550 /* Force alingment to multiple to avound rounding errors. */
2551 data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
2552 data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
2553
2554 error += (speed + 00 != data_rate_mhz_min);
2555 error += (speed + 20 != data_rate_mhz_max);
2556
2557 rc3x = (speed - ENCODING_BASE) / 20;
2558
2559 if (data_rate_mhz <= (speed + 20))
2560 break;
2561 }
2562
2563 return rc3x;
2564}
2565
2566/*
2567 * static global variables needed, so that functions (loops) can be
2568 * restructured from the main huge function. Its not elegant, but the
2569 * only way to break the original functions like init_octeon3_ddr3_interface()
2570 * into separate logical smaller functions with less indentation levels.
2571 */
2572static int if_num __section(".data");
2573static u32 if_mask __section(".data");
2574static int ddr_hertz __section(".data");
2575
2576static struct ddr_conf *ddr_conf __section(".data");
2577static const struct dimm_odt_config *odt_1rank_config __section(".data");
2578static const struct dimm_odt_config *odt_2rank_config __section(".data");
2579static const struct dimm_odt_config *odt_4rank_config __section(".data");
2580static struct dimm_config *dimm_config_table __section(".data");
2581static const struct dimm_odt_config *odt_config __section(".data");
2582static const struct ddr3_custom_config *c_cfg __section(".data");
2583
2584static int odt_idx __section(".data");
2585
2586static ulong tclk_psecs __section(".data");
2587static ulong eclk_psecs __section(".data");
2588
2589static int row_bits __section(".data");
2590static int col_bits __section(".data");
2591static int num_banks __section(".data");
2592static int num_ranks __section(".data");
2593static int dram_width __section(".data");
2594static int dimm_count __section(".data");
2595/* Accumulate and report all the errors before giving up */
2596static int fatal_error __section(".data");
2597/* Flag that indicates safe DDR settings should be used */
2598static int safe_ddr_flag __section(".data");
2599/* Octeon II Default: 64bit interface width */
2600static int if_64b __section(".data");
2601static int if_bytemask __section(".data");
2602static u32 mem_size_mbytes __section(".data");
2603static unsigned int didx __section(".data");
2604static int bank_bits __section(".data");
2605static int bunk_enable __section(".data");
2606static int rank_mask __section(".data");
2607static int column_bits_start __section(".data");
2608static int row_lsb __section(".data");
2609static int pbank_lsb __section(".data");
2610static int use_ecc __section(".data");
2611static int mtb_psec __section(".data");
2612static short ftb_dividend __section(".data");
2613static short ftb_divisor __section(".data");
2614static int taamin __section(".data");
2615static int tckmin __section(".data");
2616static int cl __section(".data");
2617static int min_cas_latency __section(".data");
2618static int max_cas_latency __section(".data");
2619static int override_cas_latency __section(".data");
2620static int ddr_rtt_nom_auto __section(".data");
2621static int ddr_rodt_ctl_auto __section(".data");
2622
2623static int spd_addr __section(".data");
2624static int spd_org __section(".data");
2625static int spd_banks __section(".data");
2626static int spd_rdimm __section(".data");
2627static int spd_dimm_type __section(".data");
2628static int spd_ecc __section(".data");
2629static u32 spd_cas_latency __section(".data");
2630static int spd_mtb_dividend __section(".data");
2631static int spd_mtb_divisor __section(".data");
2632static int spd_tck_min __section(".data");
2633static int spd_taa_min __section(".data");
2634static int spd_twr __section(".data");
2635static int spd_trcd __section(".data");
2636static int spd_trrd __section(".data");
2637static int spd_trp __section(".data");
2638static int spd_tras __section(".data");
2639static int spd_trc __section(".data");
2640static int spd_trfc __section(".data");
2641static int spd_twtr __section(".data");
2642static int spd_trtp __section(".data");
2643static int spd_tfaw __section(".data");
2644static int spd_addr_mirror __section(".data");
2645static int spd_package __section(".data");
2646static int spd_rawcard __section(".data");
2647static int spd_rawcard_aorb __section(".data");
2648static int spd_rdimm_registers __section(".data");
2649static int spd_thermal_sensor __section(".data");
2650
2651static int is_stacked_die __section(".data");
2652static int is_3ds_dimm __section(".data");
2653// 3DS: logical ranks per package rank
2654static int lranks_per_prank __section(".data");
2655// 3DS: logical ranks bits
2656static int lranks_bits __section(".data");
2657// in Mbits; only used for 3DS
2658static int die_capacity __section(".data");
2659
2660static enum ddr_type ddr_type __section(".data");
2661
2662static int twr __section(".data");
2663static int trcd __section(".data");
2664static int trrd __section(".data");
2665static int trp __section(".data");
2666static int tras __section(".data");
2667static int trc __section(".data");
2668static int trfc __section(".data");
2669static int twtr __section(".data");
2670static int trtp __section(".data");
2671static int tfaw __section(".data");
2672
2673static int ddr4_tckavgmin __section(".data");
2674static int ddr4_tckavgmax __section(".data");
2675static int ddr4_trdcmin __section(".data");
2676static int ddr4_trpmin __section(".data");
2677static int ddr4_trasmin __section(".data");
2678static int ddr4_trcmin __section(".data");
2679static int ddr4_trfc1min __section(".data");
2680static int ddr4_trfc2min __section(".data");
2681static int ddr4_trfc4min __section(".data");
2682static int ddr4_tfawmin __section(".data");
2683static int ddr4_trrd_smin __section(".data");
2684static int ddr4_trrd_lmin __section(".data");
2685static int ddr4_tccd_lmin __section(".data");
2686
2687static int wl_mask_err __section(".data");
2688static int wl_loops __section(".data");
2689static int default_rtt_nom[4] __section(".data");
2690static int dyn_rtt_nom_mask __section(".data");
2691static struct impedence_values *imp_val __section(".data");
2692static char default_rodt_ctl __section(".data");
2693// default to disabled (ie, try LMC restart, not chip reset)
2694static int ddr_disable_chip_reset __section(".data");
2695static const char *dimm_type_name __section(".data");
2696static int match_wl_rtt_nom __section(".data");
2697
2698struct hwl_alt_by_rank {
2699 u16 hwl_alt_mask; // mask of bytelanes with alternate
2700 u16 hwl_alt_delay[9]; // bytelane alternate avail if mask=1
2701};
2702
2703static struct hwl_alt_by_rank hwl_alts[4] __section(".data");
2704
2705#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3 // was: 5
2706static int internal_retries __section(".data");
2707
2708static int deskew_training_errors __section(".data");
2709static struct deskew_counts deskew_training_results __section(".data");
2710static int disable_deskew_training __section(".data");
2711static int restart_if_dsk_incomplete __section(".data");
2712static int dac_eval_retries __section(".data");
2713static int dac_settings[9] __section(".data");
2714static int num_samples __section(".data");
2715static int sample __section(".data");
2716static int lane __section(".data");
2717static int last_lane __section(".data");
2718static int total_dac_eval_retries __section(".data");
2719static int dac_eval_exhausted __section(".data");
2720
2721#define DEFAULT_DAC_SAMPLES 7 // originally was 5
2722#define DAC_RETRIES_LIMIT 2
2723
2724struct bytelane_sample {
2725 s16 bytes[DEFAULT_DAC_SAMPLES];
2726};
2727
2728static struct bytelane_sample lanes[9] __section(".data");
2729
2730static char disable_sequential_delay_check __section(".data");
2731static int wl_print __section(".data");
2732
2733static int enable_by_rank_init __section(".data");
2734static int saved_rank_mask __section(".data");
2735static int by_rank __section(".data");
2736static struct deskew_data rank_dsk[4] __section(".data");
2737static struct dac_data rank_dac[4] __section(".data");
2738
2739// todo: perhaps remove node at some time completely?
2740static int node __section(".data");
2741static int base_cl __section(".data");
2742
2743/* Parameters from DDR3 Specifications */
2744#define DDR3_TREFI 7800000 /* 7.8 us */
2745#define DDR3_ZQCS 80000ull /* 80 ns */
2746#define DDR3_ZQCS_INTERNAL 1280000000ull /* 128ms/100 */
2747#define DDR3_TCKE 5000 /* 5 ns */
2748#define DDR3_TMRD 4 /* 4 nCK */
2749#define DDR3_TDLLK 512 /* 512 nCK */
2750#define DDR3_TMPRR 1 /* 1 nCK */
2751#define DDR3_TWLMRD 40 /* 40 nCK */
2752#define DDR3_TWLDQSEN 25 /* 25 nCK */
2753
2754/* Parameters from DDR4 Specifications */
2755#define DDR4_TMRD 8 /* 8 nCK */
2756#define DDR4_TDLLK 768 /* 768 nCK */
2757
2758static void lmc_config(struct ddr_priv *priv)
2759{
2760 union cvmx_lmcx_config cfg;
2761 char *s;
2762
2763 cfg.u64 = 0;
2764
2765 cfg.cn78xx.ecc_ena = use_ecc;
2766 cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb);
2767 cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb);
2768
2769 cfg.cn78xx.idlepower = 0; /* Disabled */
2770
2771 s = lookup_env(priv, "ddr_idlepower");
2772 if (s)
2773 cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0);
2774
2775 cfg.cn78xx.forcewrite = 0; /* Disabled */
2776 /* Include memory reference address in the ECC */
2777 cfg.cn78xx.ecc_adr = 1;
2778
2779 s = lookup_env(priv, "ddr_ecc_adr");
2780 if (s)
2781 cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0);
2782
2783 cfg.cn78xx.reset = 0;
2784
2785 /*
2786 * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
2787 * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
2788 * ref_zqcs_int(18:7) to
2789 * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
2790 * value should always be greater than 32, to account for
2791 * resistor calibration delays.
2792 */
2793
2794 cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f);
2795 cfg.cn78xx.ref_zqcs_int |=
2796 ((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) /
2797 (512 * 128))) & 0xfff) << 7);
2798
2799 cfg.cn78xx.early_dqx = 1; /* Default to enabled */
2800
2801 s = lookup_env(priv, "ddr_early_dqx");
2802 if (!s)
2803 s = lookup_env(priv, "ddr%d_early_dqx", if_num);
2804
2805 if (s)
2806 cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0);
2807
2808 cfg.cn78xx.sref_with_dll = 0;
2809
2810 cfg.cn78xx.rank_ena = bunk_enable;
2811 cfg.cn78xx.rankmask = rank_mask; /* Set later */
2812 cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) &
2813 rank_mask;
2814 /* Set once and don't change it. */
2815 cfg.cn78xx.init_status = rank_mask;
2816 cfg.cn78xx.early_unload_d0_r0 = 0;
2817 cfg.cn78xx.early_unload_d0_r1 = 0;
2818 cfg.cn78xx.early_unload_d1_r0 = 0;
2819 cfg.cn78xx.early_unload_d1_r1 = 0;
2820 cfg.cn78xx.scrz = 0;
2821 if (octeon_is_cpuid(OCTEON_CN70XX))
2822 cfg.cn78xx.mode32b = 1; /* Read-only. Always 1. */
2823 cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0;
2824 cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) &&
2825 (dram_width == 16)) ? 0 : 1;
2826
2827 s = lookup_env_ull(priv, "ddr_config");
2828 if (s)
2829 cfg.u64 = simple_strtoull(s, NULL, 0);
2830 debug("LMC_CONFIG : 0x%016llx\n",
2831 cfg.u64);
2832 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
2833}
2834
2835static void lmc_control(struct ddr_priv *priv)
2836{
2837 union cvmx_lmcx_control ctrl;
2838 char *s;
2839
2840 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2841 ctrl.s.rdimm_ena = spd_rdimm;
2842 ctrl.s.bwcnt = 0; /* Clear counter later */
2843 if (spd_rdimm)
2844 ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm);
2845 else
2846 ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm);
2847 ctrl.s.pocas = 0;
2848 ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2);
2849 ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0;
2850 ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0;
2851 ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0;
2852 ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0;
2853 ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0;
2854 /* discards writes to addresses that don't exist in the DRAM */
2855 ctrl.s.nxm_write_en = 0;
2856 ctrl.s.max_write_batch = 8;
2857 ctrl.s.xor_bank = 1;
2858 ctrl.s.auto_dclkdis = 1;
2859 ctrl.s.int_zqcs_dis = 0;
2860 ctrl.s.ext_zqcs_dis = 0;
2861 ctrl.s.bprch = 1;
2862 ctrl.s.wodt_bprch = 1;
2863 ctrl.s.rodt_bprch = 1;
2864
2865 s = lookup_env(priv, "ddr_xor_bank");
2866 if (s)
2867 ctrl.s.xor_bank = simple_strtoul(s, NULL, 0);
2868
2869 s = lookup_env(priv, "ddr_2t");
2870 if (s)
2871 ctrl.s.ddr2t = simple_strtoul(s, NULL, 0);
2872
2873 s = lookup_env(priv, "ddr_fprch2");
2874 if (s)
2875 ctrl.s.fprch2 = simple_strtoul(s, NULL, 0);
2876
2877 s = lookup_env(priv, "ddr_bprch");
2878 if (s)
2879 ctrl.s.bprch = simple_strtoul(s, NULL, 0);
2880
2881 s = lookup_env(priv, "ddr_wodt_bprch");
2882 if (s)
2883 ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0);
2884
2885 s = lookup_env(priv, "ddr_rodt_bprch");
2886 if (s)
2887 ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0);
2888
2889 s = lookup_env(priv, "ddr_int_zqcs_dis");
2890 if (s)
2891 ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0);
2892
2893 s = lookup_env(priv, "ddr_ext_zqcs_dis");
2894 if (s)
2895 ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0);
2896
2897 s = lookup_env_ull(priv, "ddr_control");
2898 if (s)
2899 ctrl.u64 = simple_strtoull(s, NULL, 0);
2900
2901 debug("LMC_CONTROL : 0x%016llx\n",
2902 ctrl.u64);
2903 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
2904}
2905
2906static void lmc_timing_params0(struct ddr_priv *priv)
2907{
2908 union cvmx_lmcx_timing_params0 tp0;
2909 unsigned int trp_value;
2910 char *s;
2911
2912 tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num));
2913
2914 trp_value = divide_roundup(trp, tclk_psecs) - 1;
2915 debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
2916 trp_value +
2917 (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull),
2918 tclk_psecs)) - 4);
2919 s = lookup_env_ull(priv, "ddr_use_old_trp");
2920 if (s) {
2921 if (!!simple_strtoull(s, NULL, 0)) {
2922 trp_value +=
2923 divide_roundup(max(4ull * tclk_psecs, 7500ull),
2924 tclk_psecs) - 4;
2925 debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n",
2926 trp_value);
2927 }
2928 }
2929
2930 tp0.cn78xx.txpr =
2931 divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull),
2932 16 * tclk_psecs);
2933 tp0.cn78xx.trp = trp_value & 0x1f;
2934 tp0.cn78xx.tcksre =
2935 divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1;
2936
2937 if (ddr_type == DDR4_DRAM) {
2938 int tzqinit = 4; // Default to 4, for all DDR4 speed bins
2939
2940 s = lookup_env(priv, "ddr_tzqinit");
2941 if (s)
2942 tzqinit = simple_strtoul(s, NULL, 0);
2943
2944 tp0.cn78xx.tzqinit = tzqinit;
2945 /* Always 8. */
2946 tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs,
2947 (16 * tclk_psecs));
2948 tp0.cn78xx.tcke =
2949 divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE),
2950 tclk_psecs) - 1;
2951 tp0.cn78xx.tmrd =
2952 divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1;
2953 tp0.cn78xx.tmod = 25; /* 25 is the max allowed */
2954 tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256);
2955 } else {
2956 tp0.cn78xx.tzqinit =
2957 divide_roundup(max(512ull * tclk_psecs, 640000ull),
2958 (256 * tclk_psecs));
2959 tp0.cn78xx.tzqcs =
2960 divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS),
2961 (16 * tclk_psecs));
2962 tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1;
2963 tp0.cn78xx.tmrd =
2964 divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1;
2965 tp0.cn78xx.tmod =
2966 divide_roundup(max(12ull * tclk_psecs, 15000ull),
2967 tclk_psecs) - 1;
2968 tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256);
2969 }
2970
2971 s = lookup_env_ull(priv, "ddr_timing_params0");
2972 if (s)
2973 tp0.u64 = simple_strtoull(s, NULL, 0);
2974 debug("TIMING_PARAMS0 : 0x%016llx\n",
2975 tp0.u64);
2976 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64);
2977}
2978
2979static void lmc_timing_params1(struct ddr_priv *priv)
2980{
2981 union cvmx_lmcx_timing_params1 tp1;
2982 unsigned int txp, temp_trcd, trfc_dlr;
2983 char *s;
2984
2985 tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
2986
2987 /* .cn70xx. */
2988 tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1;
2989
2990 tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1;
2991
2992 temp_trcd = divide_roundup(trcd, tclk_psecs);
2993 if (temp_trcd > 15) {
2994 debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n",
2995 temp_trcd);
2996 }
2997 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) {
2998 /*
2999 * Let .trcd=0 serve as a flag that the field has
3000 * overflowed. Must use Additive Latency mode as a
3001 * workaround.
3002 */
3003 temp_trcd = 0;
3004 }
3005 tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf;
3006 tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1;
3007
3008 tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1;
3009 tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs);
3010
3011 if (ddr_type == DDR4_DRAM) {
3012 /* Workaround bug 24006. Use Trrd_l. */
3013 tp1.cn78xx.trrd =
3014 divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3015 } else {
3016 tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2;
3017 }
3018
3019 /*
3020 * tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec
3021 * tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec
3022 * tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec
3023 * tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec
3024 * tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec
3025 * tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec
3026 */
3027 txp = (tclk_psecs < 1875) ? 6000 : 7500;
3028 txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp),
3029 tclk_psecs) - 1;
3030 if (txp > 7) {
3031 debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n",
3032 txp);
3033 }
3034 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7)
3035 txp = 7; // max it out
3036 tp1.cn78xx.txp = (txp >> 0) & 7;
3037 tp1.cn78xx.txp_ext = (txp >> 3) & 1;
3038
3039 tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs,
3040 4 * tclk_psecs);
3041 tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs,
3042 4 * tclk_psecs);
3043 tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs);
3044 tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull),
3045 tclk_psecs) - 1;
3046
3047 if (ddr_type == DDR4_DRAM && is_3ds_dimm) {
3048 /*
3049 * 4 Gb: tRFC_DLR = 90 ns
3050 * 8 Gb: tRFC_DLR = 120 ns
3051 * 16 Gb: tRFC_DLR = 190 ns FIXME?
3052 */
3053 if (die_capacity == 0x1000) // 4 Gbit
3054 trfc_dlr = 90;
3055 else if (die_capacity == 0x2000) // 8 Gbit
3056 trfc_dlr = 120;
3057 else if (die_capacity == 0x4000) // 16 Gbit
3058 trfc_dlr = 190;
3059 else
3060 trfc_dlr = 0;
3061
3062 if (trfc_dlr == 0) {
3063 debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
3064 node, if_num, die_capacity);
3065 } else {
3066 tp1.cn78xx.trfc_dlr =
3067 divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs);
3068 debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
3069 node, if_num, tp1.cn78xx.trfc_dlr);
3070 }
3071 }
3072
3073 s = lookup_env_ull(priv, "ddr_timing_params1");
3074 if (s)
3075 tp1.u64 = simple_strtoull(s, NULL, 0);
3076
3077 debug("TIMING_PARAMS1 : 0x%016llx\n",
3078 tp1.u64);
3079 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3080}
3081
3082static void lmc_timing_params2(struct ddr_priv *priv)
3083{
3084 if (ddr_type == DDR4_DRAM) {
3085 union cvmx_lmcx_timing_params1 tp1;
3086 union cvmx_lmcx_timing_params2 tp2;
3087 int temp_trrd_l;
3088
3089 tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
3090 tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num));
3091 debug("TIMING_PARAMS2 : 0x%016llx\n",
3092 tp2.u64);
3093
3094 temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3095 if (temp_trrd_l > 7)
3096 debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n",
3097 temp_trrd_l);
3098 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7)
3099 temp_trrd_l = 7; // max it out
3100 tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7;
3101 tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1;
3102
3103 // correct for 1600-2400
3104 tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull),
3105 tclk_psecs) - 1;
3106 tp2.s.t_rw_op_max = 7;
3107 tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull),
3108 tclk_psecs) - 1;
3109
3110 debug("TIMING_PARAMS2 : 0x%016llx\n",
3111 tp2.u64);
3112 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64);
3113
3114 /*
3115 * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
3116 * for Write-to-Read operations to the same Bank Group
3117 */
3118 if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) {
3119 tp1.cn78xx.twtr = tp2.s.twtr_l - 4;
3120 debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n",
3121 tp1.cn78xx.twtr, tp2.s.twtr_l);
3122 debug("TIMING_PARAMS1 : 0x%016llx\n",
3123 tp1.u64);
3124 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3125 }
3126 }
3127}
3128
3129static void lmc_modereg_params0(struct ddr_priv *priv)
3130{
3131 union cvmx_lmcx_modereg_params0 mp0;
3132 int param;
3133 char *s;
3134
3135 mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
3136
3137 if (ddr_type == DDR4_DRAM) {
3138 mp0.s.cwl = 0; /* 1600 (1250ps) */
3139 if (tclk_psecs < 1250)
3140 mp0.s.cwl = 1; /* 1866 (1072ps) */
3141 if (tclk_psecs < 1072)
3142 mp0.s.cwl = 2; /* 2133 (938ps) */
3143 if (tclk_psecs < 938)
3144 mp0.s.cwl = 3; /* 2400 (833ps) */
3145 if (tclk_psecs < 833)
3146 mp0.s.cwl = 4; /* 2666 (750ps) */
3147 if (tclk_psecs < 750)
3148 mp0.s.cwl = 5; /* 3200 (625ps) */
3149 } else {
3150 /*
3151 ** CSR CWL CAS write Latency
3152 ** === === =================================
3153 ** 0 5 ( tCK(avg) >= 2.5 ns)
3154 ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns)
3155 ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns)
3156 ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns)
3157 ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns)
3158 ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns)
3159 ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns)
3160 ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns)
3161 */
3162
3163 mp0.s.cwl = 0;
3164 if (tclk_psecs < 2500)
3165 mp0.s.cwl = 1;
3166 if (tclk_psecs < 1875)
3167 mp0.s.cwl = 2;
3168 if (tclk_psecs < 1500)
3169 mp0.s.cwl = 3;
3170 if (tclk_psecs < 1250)
3171 mp0.s.cwl = 4;
3172 if (tclk_psecs < 1070)
3173 mp0.s.cwl = 5;
3174 if (tclk_psecs < 935)
3175 mp0.s.cwl = 6;
3176 if (tclk_psecs < 833)
3177 mp0.s.cwl = 7;
3178 }
3179
3180 s = lookup_env(priv, "ddr_cwl");
3181 if (s)
3182 mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5;
3183
3184 if (ddr_type == DDR4_DRAM) {
3185 debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3186 mp0.s.cwl + 9
3187 + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl);
3188 } else {
3189 debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3190 mp0.s.cwl + 5, mp0.s.cwl);
3191 }
3192
3193 mp0.s.mprloc = 0;
3194 mp0.s.mpr = 0;
3195 mp0.s.dll = (ddr_type == DDR4_DRAM); /* 0 for DDR3 and 1 for DDR4 */
3196 mp0.s.al = 0;
3197 mp0.s.wlev = 0; /* Read Only */
3198 if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM)
3199 mp0.s.tdqs = 0;
3200 else
3201 mp0.s.tdqs = 1;
3202 mp0.s.qoff = 0;
3203
3204 s = lookup_env(priv, "ddr_cl");
3205 if (s) {
3206 cl = simple_strtoul(s, NULL, 0);
3207 debug("CAS Latency : %6d\n",
3208 cl);
3209 }
3210
3211 if (ddr_type == DDR4_DRAM) {
3212 mp0.s.cl = 0x0;
3213 if (cl > 9)
3214 mp0.s.cl = 0x1;
3215 if (cl > 10)
3216 mp0.s.cl = 0x2;
3217 if (cl > 11)
3218 mp0.s.cl = 0x3;
3219 if (cl > 12)
3220 mp0.s.cl = 0x4;
3221 if (cl > 13)
3222 mp0.s.cl = 0x5;
3223 if (cl > 14)
3224 mp0.s.cl = 0x6;
3225 if (cl > 15)
3226 mp0.s.cl = 0x7;
3227 if (cl > 16)
3228 mp0.s.cl = 0x8;
3229 if (cl > 18)
3230 mp0.s.cl = 0x9;
3231 if (cl > 20)
3232 mp0.s.cl = 0xA;
3233 if (cl > 24)
3234 mp0.s.cl = 0xB;
3235 } else {
3236 mp0.s.cl = 0x2;
3237 if (cl > 5)
3238 mp0.s.cl = 0x4;
3239 if (cl > 6)
3240 mp0.s.cl = 0x6;
3241 if (cl > 7)
3242 mp0.s.cl = 0x8;
3243 if (cl > 8)
3244 mp0.s.cl = 0xA;
3245 if (cl > 9)
3246 mp0.s.cl = 0xC;
3247 if (cl > 10)
3248 mp0.s.cl = 0xE;
3249 if (cl > 11)
3250 mp0.s.cl = 0x1;
3251 if (cl > 12)
3252 mp0.s.cl = 0x3;
3253 if (cl > 13)
3254 mp0.s.cl = 0x5;
3255 if (cl > 14)
3256 mp0.s.cl = 0x7;
3257 if (cl > 15)
3258 mp0.s.cl = 0x9;
3259 }
3260
3261 mp0.s.rbt = 0; /* Read Only. */
3262 mp0.s.tm = 0;
3263 mp0.s.dllr = 0;
3264
3265 param = divide_roundup(twr, tclk_psecs);
3266
3267 if (ddr_type == DDR4_DRAM) { /* DDR4 */
3268 mp0.s.wrp = 1;
3269 if (param > 12)
3270 mp0.s.wrp = 2;
3271 if (param > 14)
3272 mp0.s.wrp = 3;
3273 if (param > 16)
3274 mp0.s.wrp = 4;
3275 if (param > 18)
3276 mp0.s.wrp = 5;
3277 if (param > 20)
3278 mp0.s.wrp = 6;
3279 if (param > 24) /* RESERVED in DDR4 spec */
3280 mp0.s.wrp = 7;
3281 } else { /* DDR3 */
3282 mp0.s.wrp = 1;
3283 if (param > 5)
3284 mp0.s.wrp = 2;
3285 if (param > 6)
3286 mp0.s.wrp = 3;
3287 if (param > 7)
3288 mp0.s.wrp = 4;
3289 if (param > 8)
3290 mp0.s.wrp = 5;
3291 if (param > 10)
3292 mp0.s.wrp = 6;
3293 if (param > 12)
3294 mp0.s.wrp = 7;
3295 }
3296
3297 mp0.s.ppd = 0;
3298
3299 s = lookup_env(priv, "ddr_wrp");
3300 if (s)
3301 mp0.s.wrp = simple_strtoul(s, NULL, 0);
3302
3303 debug("%-45s : %d, [0x%x]\n",
3304 "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp);
3305
3306 s = lookup_env_ull(priv, "ddr_modereg_params0");
3307 if (s)
3308 mp0.u64 = simple_strtoull(s, NULL, 0);
3309
3310 debug("MODEREG_PARAMS0 : 0x%016llx\n",
3311 mp0.u64);
3312 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
3313}
3314
3315static void lmc_modereg_params1(struct ddr_priv *priv)
3316{
3317 union cvmx_lmcx_modereg_params1 mp1;
3318 char *s;
3319 int i;
3320
3321 mp1.u64 = odt_config[odt_idx].modereg_params1.u64;
3322
3323 /*
3324 * Special request: mismatched DIMM support. Slot 0: 2-Rank,
3325 * Slot 1: 1-Rank
3326 */
3327 if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
3328 mp1.s.rtt_nom_00 = 0;
3329 mp1.s.rtt_nom_01 = 3; /* rttnom_40ohm */
3330 mp1.s.rtt_nom_10 = 3; /* rttnom_40ohm */
3331 mp1.s.rtt_nom_11 = 0;
3332 dyn_rtt_nom_mask = 0x6;
3333 }
3334
3335 s = lookup_env(priv, "ddr_rtt_nom_mask");
3336 if (s)
3337 dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0);
3338
3339 /*
3340 * Save the original rtt_nom settings before sweeping through
3341 * settings.
3342 */
3343 default_rtt_nom[0] = mp1.s.rtt_nom_00;
3344 default_rtt_nom[1] = mp1.s.rtt_nom_01;
3345 default_rtt_nom[2] = mp1.s.rtt_nom_10;
3346 default_rtt_nom[3] = mp1.s.rtt_nom_11;
3347
3348 ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto;
3349
3350 for (i = 0; i < 4; ++i) {
3351 u64 value;
3352
3353 s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2),
3354 !!(i & 1));
3355 if (!s)
3356 s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num,
3357 !!(i & 2), !!(i & 1));
3358 if (s) {
3359 value = simple_strtoul(s, NULL, 0);
3360 mp1.u64 &= ~((u64)0x7 << (i * 12 + 9));
3361 mp1.u64 |= ((value & 0x7) << (i * 12 + 9));
3362 default_rtt_nom[i] = value;
3363 ddr_rtt_nom_auto = 0;
3364 }
3365 }
3366
3367 s = lookup_env(priv, "ddr_rtt_nom");
3368 if (!s)
3369 s = lookup_env(priv, "ddr%d_rtt_nom", if_num);
3370 if (s) {
3371 u64 value;
3372
3373 value = simple_strtoul(s, NULL, 0);
3374
3375 if (dyn_rtt_nom_mask & 1) {
3376 default_rtt_nom[0] = value;
3377 mp1.s.rtt_nom_00 = value;
3378 }
3379 if (dyn_rtt_nom_mask & 2) {
3380 default_rtt_nom[1] = value;
3381 mp1.s.rtt_nom_01 = value;
3382 }
3383 if (dyn_rtt_nom_mask & 4) {
3384 default_rtt_nom[2] = value;
3385 mp1.s.rtt_nom_10 = value;
3386 }
3387 if (dyn_rtt_nom_mask & 8) {
3388 default_rtt_nom[3] = value;
3389 mp1.s.rtt_nom_11 = value;
3390 }
3391
3392 ddr_rtt_nom_auto = 0;
3393 }
3394
3395 for (i = 0; i < 4; ++i) {
3396 u64 value;
3397
3398 s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1));
3399 if (!s)
3400 s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num,
3401 !!(i & 2), !!(i & 1));
3402 if (s) {
3403 value = simple_strtoul(s, NULL, 0);
3404 insrt_wr(&mp1.u64, i, value);
3405 }
3406 }
3407
3408 // Make sure 78XX pass 1 has valid RTT_WR settings, because
3409 // configuration files may be set-up for later chips, and
3410 // 78XX pass 1 supports no RTT_WR extension bits
3411 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3412 for (i = 0; i < 4; ++i) {
3413 // if 80 or undefined
3414 if (extr_wr(mp1.u64, i) > 3) {
3415 // FIXME? always insert 120
3416 insrt_wr(&mp1.u64, i, 1);
3417 debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n",
3418 !!(i & 2), i & 1);
3419 }
3420 }
3421 }
3422
3423 s = lookup_env(priv, "ddr_dic");
3424 if (s) {
3425 u64 value = simple_strtoul(s, NULL, 0);
3426
3427 for (i = 0; i < 4; ++i) {
3428 mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3429 mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3430 }
3431 }
3432
3433 for (i = 0; i < 4; ++i) {
3434 u64 value;
3435
3436 s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1));
3437 if (s) {
3438 value = simple_strtoul(s, NULL, 0);
3439 mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3440 mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3441 }
3442 }
3443
3444 s = lookup_env_ull(priv, "ddr_modereg_params1");
3445 if (s)
3446 mp1.u64 = simple_strtoull(s, NULL, 0);
3447
3448 debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3449 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
3450 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
3451 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
3452 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
3453 mp1.s.rtt_nom_11,
3454 mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00);
3455
3456 debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3457 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
3458 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
3459 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
3460 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
3461 extr_wr(mp1.u64, 3),
3462 extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0));
3463
3464 debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3465 imp_val->dic_ohms[mp1.s.dic_11],
3466 imp_val->dic_ohms[mp1.s.dic_10],
3467 imp_val->dic_ohms[mp1.s.dic_01],
3468 imp_val->dic_ohms[mp1.s.dic_00],
3469 mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00);
3470
3471 debug("MODEREG_PARAMS1 : 0x%016llx\n",
3472 mp1.u64);
3473 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64);
3474}
3475
3476static void lmc_modereg_params2(struct ddr_priv *priv)
3477{
3478 char *s;
3479 int i;
3480
3481 if (ddr_type == DDR4_DRAM) {
3482 union cvmx_lmcx_modereg_params2 mp2;
3483
3484 mp2.u64 = odt_config[odt_idx].modereg_params2.u64;
3485
3486 s = lookup_env(priv, "ddr_rtt_park");
3487 if (s) {
3488 u64 value = simple_strtoul(s, NULL, 0);
3489
3490 for (i = 0; i < 4; ++i) {
3491 mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3492 mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3493 }
3494 }
3495
3496 for (i = 0; i < 4; ++i) {
3497 u64 value;
3498
3499 s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2),
3500 !!(i & 1));
3501 if (s) {
3502 value = simple_strtoul(s, NULL, 0);
3503 mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3504 mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3505 }
3506 }
3507
3508 s = lookup_env_ull(priv, "ddr_modereg_params2");
3509 if (s)
3510 mp2.u64 = simple_strtoull(s, NULL, 0);
3511
3512 debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3513 imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
3514 imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
3515 imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
3516 imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
3517 mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01,
3518 mp2.s.rtt_park_00);
3519
3520 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
3521 mp2.s.vref_range_11,
3522 mp2.s.vref_range_10,
3523 mp2.s.vref_range_01, mp2.s.vref_range_00);
3524
3525 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
3526 mp2.s.vref_value_11,
3527 mp2.s.vref_value_10,
3528 mp2.s.vref_value_01, mp2.s.vref_value_00);
3529
3530 debug("MODEREG_PARAMS2 : 0x%016llx\n",
3531 mp2.u64);
3532 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64);
3533 }
3534}
3535
3536static void lmc_modereg_params3(struct ddr_priv *priv)
3537{
3538 char *s;
3539
3540 if (ddr_type == DDR4_DRAM) {
3541 union cvmx_lmcx_modereg_params3 mp3;
3542
3543 mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num));
3544 /* Disable as workaround to Errata 20547 */
3545 mp3.s.rd_dbi = 0;
3546 mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs),
3547 5ull) - 4;
3548
3549 s = lookup_env(priv, "ddr_rd_preamble");
3550 if (s)
3551 mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0);
3552
3553 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3554 int delay = 0;
3555
3556 if (lranks_per_prank == 4 && ddr_hertz >= 1000000000)
3557 delay = 1;
3558
3559 mp3.s.xrank_add_tccd_l = delay;
3560 mp3.s.xrank_add_tccd_s = delay;
3561 }
3562
3563 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64);
3564 debug("MODEREG_PARAMS3 : 0x%016llx\n",
3565 mp3.u64);
3566 }
3567}
3568
3569static void lmc_nxm(struct ddr_priv *priv)
3570{
3571 union cvmx_lmcx_nxm lmc_nxm;
3572 int num_bits = row_lsb + row_bits + lranks_bits - 26;
3573 char *s;
3574
3575 lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num));
3576
3577 /* .cn78xx. */
3578 if (rank_mask & 0x1)
3579 lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits;
3580 if (rank_mask & 0x2)
3581 lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits;
3582 if (rank_mask & 0x4)
3583 lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits;
3584 if (rank_mask & 0x8)
3585 lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits;
3586
3587 /* Set the mask for non-existent ranks. */
3588 lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff;
3589
3590 s = lookup_env_ull(priv, "ddr_nxm");
3591 if (s)
3592 lmc_nxm.u64 = simple_strtoull(s, NULL, 0);
3593
3594 debug("LMC_NXM : 0x%016llx\n",
3595 lmc_nxm.u64);
3596 lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64);
3597}
3598
3599static void lmc_wodt_mask(struct ddr_priv *priv)
3600{
3601 union cvmx_lmcx_wodt_mask wodt_mask;
3602 char *s;
3603
3604 wodt_mask.u64 = odt_config[odt_idx].odt_mask;
3605
3606 s = lookup_env_ull(priv, "ddr_wodt_mask");
3607 if (s)
3608 wodt_mask.u64 = simple_strtoull(s, NULL, 0);
3609
3610 debug("WODT_MASK : 0x%016llx\n",
3611 wodt_mask.u64);
3612 lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64);
3613}
3614
3615static void lmc_rodt_mask(struct ddr_priv *priv)
3616{
3617 union cvmx_lmcx_rodt_mask rodt_mask;
3618 int rankx;
3619 char *s;
3620
3621 rodt_mask.u64 = odt_config[odt_idx].rodt_ctl;
3622
3623 s = lookup_env_ull(priv, "ddr_rodt_mask");
3624 if (s)
3625 rodt_mask.u64 = simple_strtoull(s, NULL, 0);
3626
3627 debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64);
3628 lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64);
3629
3630 dyn_rtt_nom_mask = 0;
3631 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
3632 if (!(rank_mask & (1 << rankx)))
3633 continue;
3634 dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff);
3635 }
3636 if (num_ranks == 4) {
3637 /*
3638 * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
3639 * ODT1 is wired to the third rank (rank 2). The mask,
3640 * dyn_rtt_nom_mask, is used to indicate for which ranks
3641 * to sweep RTT_NOM during read-leveling. Shift the bit
3642 * from the ODT1 position over to the "ODT2" position so
3643 * that the read-leveling analysis comes out right.
3644 */
3645 int odt1_bit = dyn_rtt_nom_mask & 2;
3646
3647 dyn_rtt_nom_mask &= ~2;
3648 dyn_rtt_nom_mask |= odt1_bit << 1;
3649 }
3650 debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
3651}
3652
3653static void lmc_comp_ctl2(struct ddr_priv *priv)
3654{
3655 union cvmx_lmcx_comp_ctl2 cc2;
3656 char *s;
3657
3658 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
3659
3660 cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena;
3661 /* Default 4=34.3 ohm */
3662 cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl;
3663 /* Default 4=34.3 ohm */
3664 cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl;
3665 /* Default 4=34.3 ohm */
3666 cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl;
3667
3668 ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto;
3669 s = lookup_env(priv, "ddr_rodt_ctl_auto");
3670 if (s)
3671 ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0);
3672
3673 default_rodt_ctl = odt_config[odt_idx].qs_dic;
3674 s = lookup_env(priv, "ddr_rodt_ctl");
3675 if (!s)
3676 s = lookup_env(priv, "ddr%d_rodt_ctl", if_num);
3677 if (s) {
3678 default_rodt_ctl = simple_strtoul(s, NULL, 0);
3679 ddr_rodt_ctl_auto = 0;
3680 }
3681
3682 cc2.cn70xx.rodt_ctl = default_rodt_ctl;
3683
3684 // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms,
3685 // and DCLK speed is 1 GHz or more...
3686 if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm &&
3687 ddr_hertz >= 1000000000) {
3688 // lowest for DDR4 is 26 ohms
3689 cc2.s.ck_ctl = ddr4_driver_26_ohm;
3690 debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n",
3691 node, if_num, cc2.s.ck_ctl,
3692 imp_val->drive_strength[cc2.s.ck_ctl]);
3693 }
3694
3695 // if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms,
3696 // if DCLK speed is 1 GHz or more...
3697 if (ddr_type == DDR4_DRAM && dimm_count == 2 &&
3698 (spd_dimm_type == 2 || spd_dimm_type == 6) &&
3699 ddr_hertz >= 1000000000) {
3700 // lowest for DDR4 is 26 ohms
3701 cc2.cn78xx.control_ctl = ddr4_driver_26_ohm;
3702 // lowest for DDR4 is 26 ohms
3703 cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm;
3704 debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n",
3705 node, if_num, ddr4_driver_26_ohm,
3706 imp_val->drive_strength[ddr4_driver_26_ohm]);
3707 }
3708
3709 s = lookup_env(priv, "ddr_ck_ctl");
3710 if (s)
3711 cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0);
3712
3713 s = lookup_env(priv, "ddr_cmd_ctl");
3714 if (s)
3715 cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0);
3716
3717 s = lookup_env(priv, "ddr_control_ctl");
3718 if (s)
3719 cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0);
3720
3721 s = lookup_env(priv, "ddr_dqx_ctl");
3722 if (s)
3723 cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0);
3724
3725 debug("%-45s : %d, %d ohms\n", "DQX_CTL ", cc2.cn78xx.dqx_ctl,
3726 imp_val->drive_strength[cc2.cn78xx.dqx_ctl]);
3727 debug("%-45s : %d, %d ohms\n", "CK_CTL ", cc2.cn78xx.ck_ctl,
3728 imp_val->drive_strength[cc2.cn78xx.ck_ctl]);
3729 debug("%-45s : %d, %d ohms\n", "CMD_CTL ", cc2.cn78xx.cmd_ctl,
3730 imp_val->drive_strength[cc2.cn78xx.cmd_ctl]);
3731 debug("%-45s : %d, %d ohms\n", "CONTROL_CTL ",
3732 cc2.cn78xx.control_ctl,
3733 imp_val->drive_strength[cc2.cn78xx.control_ctl]);
3734 debug("Read ODT_CTL : 0x%x (%d ohms)\n",
3735 cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
3736
3737 debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64);
3738 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
3739}
3740
3741static void lmc_phy_ctl(struct ddr_priv *priv)
3742{
3743 union cvmx_lmcx_phy_ctl phy_ctl;
3744
3745 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
3746 phy_ctl.s.ts_stagger = 0;
3747 // FIXME: are there others TBD?
3748 phy_ctl.s.dsk_dbg_overwrt_ena = 0;
3749
3750 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3751 // C0 is TEN, C1 is A17
3752 phy_ctl.s.c0_sel = 2;
3753 phy_ctl.s.c1_sel = 2;
3754 debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
3755 node, if_num, phy_ctl.s.c1_sel);
3756 }
3757
3758 debug("PHY_CTL : 0x%016llx\n",
3759 phy_ctl.u64);
3760 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
3761}
3762
3763static void lmc_ext_config(struct ddr_priv *priv)
3764{
3765 union cvmx_lmcx_ext_config ext_cfg;
3766 char *s;
3767
3768 ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
3769 ext_cfg.s.vrefint_seq_deskew = 0;
3770 ext_cfg.s.read_ena_bprch = 1;
3771 ext_cfg.s.read_ena_fprch = 1;
3772 ext_cfg.s.drive_ena_fprch = 1;
3773 ext_cfg.s.drive_ena_bprch = 1;
3774 // make sure this is OFF for all current chips
3775 ext_cfg.s.invert_data = 0;
3776
3777 s = lookup_env(priv, "ddr_read_fprch");
3778 if (s)
3779 ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0);
3780
3781 s = lookup_env(priv, "ddr_read_bprch");
3782 if (s)
3783 ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0);
3784
3785 s = lookup_env(priv, "ddr_drive_fprch");
3786 if (s)
3787 ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0);
3788
3789 s = lookup_env(priv, "ddr_drive_bprch");
3790 if (s)
3791 ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0);
3792
3793 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3794 ext_cfg.s.dimm0_cid = lranks_bits;
3795 ext_cfg.s.dimm1_cid = lranks_bits;
3796 debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
3797 node, if_num, ext_cfg.s.dimm0_cid);
3798 }
3799
3800 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
3801 debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64);
3802}
3803
3804static void lmc_ext_config2(struct ddr_priv *priv)
3805{
3806 char *s;
3807
3808 // NOTE: all chips have this register, but not necessarily the
3809 // fields we modify...
3810 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
3811 !octeon_is_cpuid(OCTEON_CN73XX)) {
3812 union cvmx_lmcx_ext_config2 ext_cfg2;
3813 int value = 1; // default to 1
3814
3815 ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num));
3816
3817 s = lookup_env(priv, "ddr_ext2_delay_unload");
3818 if (s)
3819 value = !!simple_strtoul(s, NULL, 0);
3820
3821 ext_cfg2.s.delay_unload_r0 = value;
3822 ext_cfg2.s.delay_unload_r1 = value;
3823 ext_cfg2.s.delay_unload_r2 = value;
3824 ext_cfg2.s.delay_unload_r3 = value;
3825
3826 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64);
3827 debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64);
3828 }
3829}
3830
3831static void lmc_dimm01_params_loop(struct ddr_priv *priv)
3832{
3833 union cvmx_lmcx_dimmx_params dimm_p;
3834 int dimmx = didx;
3835 char *s;
3836 int rc;
3837 int i;
3838
3839 dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num));
3840
3841 if (ddr_type == DDR4_DRAM) {
3842 union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0;
3843 union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1;
3844 union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl;
3845
3846 dimm_p.s.rc0 = 0;
3847 dimm_p.s.rc1 = 0;
3848 dimm_p.s.rc2 = 0;
3849
3850 rc = read_spd(&dimm_config_table[didx], 0,
3851 DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
3852 dimm_p.s.rc3 = (rc >> 4) & 0xf;
3853 dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2;
3854 dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0;
3855
3856 rc = read_spd(&dimm_config_table[didx], 0,
3857 DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
3858 dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2;
3859 dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0;
3860
3861 dimm_p.s.rc6 = 0;
3862 dimm_p.s.rc7 = 0;
3863 dimm_p.s.rc8 = 0;
3864 dimm_p.s.rc9 = 0;
3865
3866 /*
3867 * rc10 DDR4 RDIMM Operating Speed
3868 * === ===================================================
3869 * 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
3870 * 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
3871 * 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps)
3872 * 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps)
3873 * 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps)
3874 * 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps)
3875 */
3876 dimm_p.s.rc10 = 0;
3877 if (tclk_psecs < 1250)
3878 dimm_p.s.rc10 = 1;
3879 if (tclk_psecs < 1071)
3880 dimm_p.s.rc10 = 2;
3881 if (tclk_psecs < 938)
3882 dimm_p.s.rc10 = 3;
3883 if (tclk_psecs < 833)
3884 dimm_p.s.rc10 = 4;
3885 if (tclk_psecs < 750)
3886 dimm_p.s.rc10 = 5;
3887
3888 dimm_p.s.rc11 = 0;
3889 dimm_p.s.rc12 = 0;
3890 /* 0=LRDIMM, 1=RDIMM */
3891 dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4;
3892 dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ?
3893 (spd_addr_mirror << 3) : 0;
3894 dimm_p.s.rc14 = 0;
3895 dimm_p.s.rc15 = 0; /* 1 nCK latency adder */
3896
3897 ddr4_p0.u64 = 0;
3898
3899 ddr4_p0.s.rc8x = 0;
3900 ddr4_p0.s.rc7x = 0;
3901 ddr4_p0.s.rc6x = 0;
3902 ddr4_p0.s.rc5x = 0;
3903 ddr4_p0.s.rc4x = 0;
3904
3905 ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs);
3906
3907 ddr4_p0.s.rc2x = 0;
3908 ddr4_p0.s.rc1x = 0;
3909
3910 ddr4_p1.u64 = 0;
3911
3912 ddr4_p1.s.rcbx = 0;
3913 ddr4_p1.s.rcax = 0;
3914 ddr4_p1.s.rc9x = 0;
3915
3916 ddr4_ctl.u64 = 0;
3917 ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004;
3918 ddr4_ctl.cn70xx.ddr4_dimm1_wmask =
3919 (dimm_count > 1) ? 0x004 : 0x0000;
3920
3921 /*
3922 * Handle any overrides from envvars here...
3923 */
3924 s = lookup_env(priv, "ddr_ddr4_params0");
3925 if (s)
3926 ddr4_p0.u64 = simple_strtoul(s, NULL, 0);
3927
3928 s = lookup_env(priv, "ddr_ddr4_params1");
3929 if (s)
3930 ddr4_p1.u64 = simple_strtoul(s, NULL, 0);
3931
3932 s = lookup_env(priv, "ddr_ddr4_dimm_ctl");
3933 if (s)
3934 ddr4_ctl.u64 = simple_strtoul(s, NULL, 0);
3935
3936 for (i = 0; i < 11; ++i) {
3937 u64 value;
3938
3939 s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1);
3940 if (s) {
3941 value = simple_strtoul(s, NULL, 0);
3942 if (i < 8) {
3943 ddr4_p0.u64 &= ~((u64)0xff << (i * 8));
3944 ddr4_p0.u64 |= (value << (i * 8));
3945 } else {
3946 ddr4_p1.u64 &=
3947 ~((u64)0xff << ((i - 8) * 8));
3948 ddr4_p1.u64 |= (value << ((i - 8) * 8));
3949 }
3950 }
3951 }
3952
3953 /*
3954 * write the final CSR values
3955 */
3956 lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num),
3957 ddr4_p0.u64);
3958
3959 lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64);
3960
3961 lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num),
3962 ddr4_p1.u64);
3963
3964 debug("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
3965 dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax,
3966 ddr4_p1.s.rc9x, ddr4_p0.s.rc8x,
3967 ddr4_p0.s.rc7x, ddr4_p0.s.rc6x,
3968 ddr4_p0.s.rc5x, ddr4_p0.s.rc4x,
3969 ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x);
3970
3971 } else {
3972 rc = read_spd(&dimm_config_table[didx], 0, 69);
3973 dimm_p.s.rc0 = (rc >> 0) & 0xf;
3974 dimm_p.s.rc1 = (rc >> 4) & 0xf;
3975
3976 rc = read_spd(&dimm_config_table[didx], 0, 70);
3977 dimm_p.s.rc2 = (rc >> 0) & 0xf;
3978 dimm_p.s.rc3 = (rc >> 4) & 0xf;
3979
3980 rc = read_spd(&dimm_config_table[didx], 0, 71);
3981 dimm_p.s.rc4 = (rc >> 0) & 0xf;
3982 dimm_p.s.rc5 = (rc >> 4) & 0xf;
3983
3984 rc = read_spd(&dimm_config_table[didx], 0, 72);
3985 dimm_p.s.rc6 = (rc >> 0) & 0xf;
3986 dimm_p.s.rc7 = (rc >> 4) & 0xf;
3987
3988 rc = read_spd(&dimm_config_table[didx], 0, 73);
3989 dimm_p.s.rc8 = (rc >> 0) & 0xf;
3990 dimm_p.s.rc9 = (rc >> 4) & 0xf;
3991
3992 rc = read_spd(&dimm_config_table[didx], 0, 74);
3993 dimm_p.s.rc10 = (rc >> 0) & 0xf;
3994 dimm_p.s.rc11 = (rc >> 4) & 0xf;
3995
3996 rc = read_spd(&dimm_config_table[didx], 0, 75);
3997 dimm_p.s.rc12 = (rc >> 0) & 0xf;
3998 dimm_p.s.rc13 = (rc >> 4) & 0xf;
3999
4000 rc = read_spd(&dimm_config_table[didx], 0, 76);
4001 dimm_p.s.rc14 = (rc >> 0) & 0xf;
4002 dimm_p.s.rc15 = (rc >> 4) & 0xf;
4003
4004 s = ddr_getenv_debug(priv, "ddr_clk_drive");
4005 if (s) {
4006 if (strcmp(s, "light") == 0)
4007 dimm_p.s.rc5 = 0x0; /* Light Drive */
4008 if (strcmp(s, "moderate") == 0)
4009 dimm_p.s.rc5 = 0x5; /* Moderate Drive */
4010 if (strcmp(s, "strong") == 0)
4011 dimm_p.s.rc5 = 0xA; /* Strong Drive */
4012 printf("Parameter found in environment. ddr_clk_drive = %s\n",
4013 s);
4014 }
4015
4016 s = ddr_getenv_debug(priv, "ddr_cmd_drive");
4017 if (s) {
4018 if (strcmp(s, "light") == 0)
4019 dimm_p.s.rc3 = 0x0; /* Light Drive */
4020 if (strcmp(s, "moderate") == 0)
4021 dimm_p.s.rc3 = 0x5; /* Moderate Drive */
4022 if (strcmp(s, "strong") == 0)
4023 dimm_p.s.rc3 = 0xA; /* Strong Drive */
4024 printf("Parameter found in environment. ddr_cmd_drive = %s\n",
4025 s);
4026 }
4027
4028 s = ddr_getenv_debug(priv, "ddr_ctl_drive");
4029 if (s) {
4030 if (strcmp(s, "light") == 0)
4031 dimm_p.s.rc4 = 0x0; /* Light Drive */
4032 if (strcmp(s, "moderate") == 0)
4033 dimm_p.s.rc4 = 0x5; /* Moderate Drive */
4034 printf("Parameter found in environment. ddr_ctl_drive = %s\n",
4035 s);
4036 }
4037
4038 /*
4039 * rc10 DDR3 RDIMM Operating Speed
4040 * == =====================================================
4041 * 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 def
4042 * 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
4043 * 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
4044 * 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
4045 * 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866
4046 */
4047 dimm_p.s.rc10 = 0;
4048 if (tclk_psecs < 2500)
4049 dimm_p.s.rc10 = 1;
4050 if (tclk_psecs < 1875)
4051 dimm_p.s.rc10 = 2;
4052 if (tclk_psecs < 1500)
4053 dimm_p.s.rc10 = 3;
4054 if (tclk_psecs < 1250)
4055 dimm_p.s.rc10 = 4;
4056 }
4057
4058 s = lookup_env(priv, "ddr_dimmx_params", i);
4059 if (s)
4060 dimm_p.u64 = simple_strtoul(s, NULL, 0);
4061
4062 for (i = 0; i < 16; ++i) {
4063 u64 value;
4064
4065 s = lookup_env(priv, "ddr_rc%d", i);
4066 if (s) {
4067 value = simple_strtoul(s, NULL, 0);
4068 dimm_p.u64 &= ~((u64)0xf << (i * 4));
4069 dimm_p.u64 |= (value << (i * 4));
4070 }
4071 }
4072
4073 lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64);
4074
4075 debug("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
4076 dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13,
4077 dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10,
4078 dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7,
4079 dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4,
4080 dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0);
4081
4082 // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers,
4083 // and treat it specially
4084 if (ddr_type == DDR3_DRAM && num_ranks == 4 &&
4085 spd_rdimm_registers == 2 && dimmx == 0) {
4086 debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n");
4087 lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64);
4088 }
4089}
4090
4091static void lmc_dimm01_params(struct ddr_priv *priv)
4092{
4093 union cvmx_lmcx_dimm_ctl dimm_ctl;
4094 char *s;
4095
4096 if (spd_rdimm) {
4097 for (didx = 0; didx < (unsigned int)dimm_count; ++didx)
4098 lmc_dimm01_params_loop(priv);
4099
4100 if (ddr_type == DDR4_DRAM) {
4101 /* LMC0_DIMM_CTL */
4102 dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4103 dimm_ctl.s.dimm0_wmask = 0xdf3f;
4104 dimm_ctl.s.dimm1_wmask =
4105 (dimm_count > 1) ? 0xdf3f : 0x0000;
4106 dimm_ctl.s.tcws = 0x4e0;
4107 dimm_ctl.s.parity = c_cfg->parity;
4108
4109 s = lookup_env(priv, "ddr_dimm0_wmask");
4110 if (s) {
4111 dimm_ctl.s.dimm0_wmask =
4112 simple_strtoul(s, NULL, 0);
4113 }
4114
4115 s = lookup_env(priv, "ddr_dimm1_wmask");
4116 if (s) {
4117 dimm_ctl.s.dimm1_wmask =
4118 simple_strtoul(s, NULL, 0);
4119 }
4120
4121 s = lookup_env(priv, "ddr_dimm_ctl_parity");
4122 if (s)
4123 dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4124
4125 s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4126 if (s)
4127 dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4128
4129 debug("LMC DIMM_CTL : 0x%016llx\n",
4130 dimm_ctl.u64);
4131 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4132
4133 /* Init RCW */
4134 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4135
4136 /* Write RC0D last */
4137 dimm_ctl.s.dimm0_wmask = 0x2000;
4138 dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ?
4139 0x2000 : 0x0000;
4140 debug("LMC DIMM_CTL : 0x%016llx\n",
4141 dimm_ctl.u64);
4142 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4143
4144 /*
4145 * Don't write any extended registers the second time
4146 */
4147 lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0);
4148
4149 /* Init RCW */
4150 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4151 } else {
4152 /* LMC0_DIMM_CTL */
4153 dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4154 dimm_ctl.s.dimm0_wmask = 0xffff;
4155 // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2
4156 // registers, and treat it specially
4157 if (num_ranks == 4 && spd_rdimm_registers == 2) {
4158 debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n");
4159 dimm_ctl.s.dimm1_wmask = 0xffff;
4160 } else {
4161 dimm_ctl.s.dimm1_wmask =
4162 (dimm_count > 1) ? 0xffff : 0x0000;
4163 }
4164 dimm_ctl.s.tcws = 0x4e0;
4165 dimm_ctl.s.parity = c_cfg->parity;
4166
4167 s = lookup_env(priv, "ddr_dimm0_wmask");
4168 if (s) {
4169 dimm_ctl.s.dimm0_wmask =
4170 simple_strtoul(s, NULL, 0);
4171 }
4172
4173 s = lookup_env(priv, "ddr_dimm1_wmask");
4174 if (s) {
4175 dimm_ctl.s.dimm1_wmask =
4176 simple_strtoul(s, NULL, 0);
4177 }
4178
4179 s = lookup_env(priv, "ddr_dimm_ctl_parity");
4180 if (s)
4181 dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4182
4183 s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4184 if (s)
4185 dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4186
4187 debug("LMC DIMM_CTL : 0x%016llx\n",
4188 dimm_ctl.u64);
4189 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4190
4191 /* Init RCW */
4192 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4193 }
4194
4195 } else {
4196 /* Disable register control writes for unbuffered */
4197 union cvmx_lmcx_dimm_ctl dimm_ctl;
4198
4199 dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4200 dimm_ctl.s.dimm0_wmask = 0;
4201 dimm_ctl.s.dimm1_wmask = 0;
4202 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4203 }
4204}
4205
4206static int lmc_rank_init(struct ddr_priv *priv)
4207{
4208 char *s;
4209
4210 if (enable_by_rank_init) {
4211 by_rank = 3;
4212 saved_rank_mask = rank_mask;
4213 }
4214
4215start_by_rank_init:
4216
4217 if (enable_by_rank_init) {
4218 rank_mask = (1 << by_rank);
4219 if (!(rank_mask & saved_rank_mask))
4220 goto end_by_rank_init;
4221 if (by_rank == 0)
4222 rank_mask = saved_rank_mask;
4223
4224 debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n",
4225 by_rank, rank_mask);
4226 }
4227
4228 /*
4229 * Comments (steps 3 through 5) continue in oct3_ddr3_seq()
4230 */
4231 union cvmx_lmcx_modereg_params0 mp0;
4232
4233 if (ddr_memory_preserved(priv)) {
4234 /*
4235 * Contents are being preserved. Take DRAM out of self-refresh
4236 * first. Then init steps can procede normally
4237 */
4238 /* self-refresh exit */
4239 oct3_ddr3_seq(priv, rank_mask, if_num, 3);
4240 }
4241
4242 mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
4243 mp0.s.dllr = 1; /* Set during first init sequence */
4244 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4245
4246 ddr_init_seq(priv, rank_mask, if_num);
4247
4248 mp0.s.dllr = 0; /* Clear for normal operation */
4249 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4250
4251 if (spd_rdimm && ddr_type == DDR4_DRAM &&
4252 octeon_is_cpuid(OCTEON_CN7XXX)) {
4253 debug("Running init sequence 1\n");
4254 change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count);
4255 }
4256
4257 memset(lanes, 0, sizeof(lanes));
4258 for (lane = 0; lane < last_lane; lane++) {
4259 // init all lanes to reset value
4260 dac_settings[lane] = 127;
4261 }
4262
4263 // FIXME: disable internal VREF if deskew is disabled?
4264 if (disable_deskew_training) {
4265 debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n",
4266 node, if_num);
4267 num_samples = 0;
4268 } else if (ddr_type == DDR4_DRAM &&
4269 !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
4270 num_samples = DEFAULT_DAC_SAMPLES;
4271 } else {
4272 // if DDR3 or no ability to write DAC values
4273 num_samples = 1;
4274 }
4275
4276perform_internal_vref_training:
4277
4278 total_dac_eval_retries = 0;
4279 dac_eval_exhausted = 0;
4280
4281 for (sample = 0; sample < num_samples; sample++) {
4282 dac_eval_retries = 0;
4283
4284 // make offset and internal vref training repeatable
4285 do {
4286 /*
4287 * 6.9.8 LMC Offset Training
4288 * LMC requires input-receiver offset training.
4289 */
4290 perform_offset_training(priv, rank_mask, if_num);
4291
4292 /*
4293 * 6.9.9 LMC Internal vref Training
4294 * LMC requires input-reference-voltage training.
4295 */
4296 perform_internal_vref_training(priv, rank_mask, if_num);
4297
4298 // read and maybe display the DAC values for a sample
4299 read_dac_dbi_settings(priv, if_num, /*DAC*/ 1,
4300 dac_settings);
4301 if (num_samples == 1 || ddr_verbose(priv)) {
4302 display_dac_dbi_settings(if_num, /*DAC*/ 1,
4303 use_ecc, dac_settings,
4304 "Internal VREF");
4305 }
4306
4307 // for DDR4, evaluate the DAC settings and retry
4308 // if any issues
4309 if (ddr_type == DDR4_DRAM) {
4310 if (evaluate_dac_settings
4311 (if_64b, use_ecc, dac_settings)) {
4312 dac_eval_retries += 1;
4313 if (dac_eval_retries >
4314 DAC_RETRIES_LIMIT) {
4315 debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
4316 node, if_num);
4317 dac_eval_exhausted += 1;
4318 } else {
4319 debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
4320 node, if_num);
4321 total_dac_eval_retries += 1;
4322 // try another sample
4323 continue;
4324 }
4325 }
4326
4327 // taking multiple samples, otherwise do nothing
4328 if (num_samples > 1) {
4329 // good sample or exhausted retries,
4330 // record it
4331 for (lane = 0; lane < last_lane;
4332 lane++) {
4333 lanes[lane].bytes[sample] =
4334 dac_settings[lane];
4335 }
4336 }
4337 }
4338 // done if DDR3, or good sample, or exhausted retries
4339 break;
4340 } while (1);
4341 }
4342
4343 if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) {
4344 debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n",
4345 node, if_num, total_dac_eval_retries, dac_eval_exhausted);
4346 }
4347
4348 if (num_samples > 1) {
4349 debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
4350 node, if_num);
4351
4352 for (lane = 0; lane < last_lane; lane++) {
4353 dac_settings[lane] =
4354 process_samples_average(&lanes[lane].bytes[0],
4355 num_samples, if_num, lane);
4356 }
4357 display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4358 dac_settings, "Averaged VREF");
4359
4360 // finally, write the final DAC values
4361 for (lane = 0; lane < last_lane; lane++) {
4362 load_dac_override(priv, if_num, dac_settings[lane],
4363 lane);
4364 }
4365 }
4366
4367 // allow override of any byte-lane internal VREF
4368 int overrode_vref_dac = 0;
4369
4370 for (lane = 0; lane < last_lane; lane++) {
4371 s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane);
4372 if (s) {
4373 dac_settings[lane] = simple_strtoul(s, NULL, 0);
4374 overrode_vref_dac = 1;
4375 // finally, write the new DAC value
4376 load_dac_override(priv, if_num, dac_settings[lane],
4377 lane);
4378 }
4379 }
4380 if (overrode_vref_dac) {
4381 display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4382 dac_settings, "Override VREF");
4383 }
4384
4385 // as a second step, after internal VREF training, before starting
4386 // deskew training:
4387 // for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting
4388 // to 127
4389 if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
4390 !disable_deskew_training) {
4391 load_dac_override(priv, if_num, 127, /* all */ 0x0A);
4392 debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n",
4393 node, if_num);
4394 }
4395
4396 /*
4397 * 4.8.8 LMC Deskew Training
4398 *
4399 * LMC requires input-read-data deskew training.
4400 */
4401 if (!disable_deskew_training) {
4402 deskew_training_errors =
4403 perform_deskew_training(priv, rank_mask, if_num,
4404 spd_rawcard_aorb);
4405
4406 // All the Deskew lock and saturation retries (may) have
4407 // been done, but we ended up with nibble errors; so,
4408 // as a last ditch effort, try the Internal vref
4409 // Training again...
4410 if (deskew_training_errors) {
4411 if (internal_retries <
4412 DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
4413 internal_retries++;
4414 debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n",
4415 node, if_num, internal_retries);
4416 goto perform_internal_vref_training;
4417 } else {
4418 if (restart_if_dsk_incomplete) {
4419 debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n",
4420 node, if_num, internal_retries);
4421 return -EAGAIN;
4422 }
4423 debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
4424 node, if_num, internal_retries);
4425 }
4426 } /* if (deskew_training_errors) */
4427
4428 // FIXME: treat this as the final DSK print from now on,
4429 // and print if VBL_NORM or above also, save the results
4430 // of the original training in case we want them later
4431 validate_deskew_training(priv, rank_mask, if_num,
4432 &deskew_training_results, 1);
4433 } else { /* if (! disable_deskew_training) */
4434 debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n",
4435 node, if_num);
4436 validate_deskew_training(priv, rank_mask, if_num,
4437 &deskew_training_results, 1);
4438 } /* if (! disable_deskew_training) */
4439
4440 if (enable_by_rank_init) {
4441 read_dac_dbi_settings(priv, if_num, /*dac */ 1,
4442 &rank_dac[by_rank].bytes[0]);
4443 get_deskew_settings(priv, if_num, &rank_dsk[by_rank]);
4444 debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4445 }
4446
4447end_by_rank_init:
4448
4449 if (enable_by_rank_init) {
4450 //debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4451
4452 by_rank--;
4453 if (by_rank >= 0)
4454 goto start_by_rank_init;
4455
4456 rank_mask = saved_rank_mask;
4457 ddr_init_seq(priv, rank_mask, if_num);
4458
4459 process_by_rank_dac(priv, if_num, rank_mask, rank_dac);
4460 process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk);
4461
4462 // FIXME: set this to prevent later checking!!!
4463 disable_deskew_training = 1;
4464
4465 debug("\n>>>>> BY_RANK: FINISHED!!\n\n");
4466 }
4467
4468 return 0;
4469}
4470
4471static void lmc_config_2(struct ddr_priv *priv)
4472{
4473 union cvmx_lmcx_config lmc_config;
4474 int save_ref_zqcs_int;
4475 u64 temp_delay_usecs;
4476
4477 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4478
4479 /*
4480 * Temporarily select the minimum ZQCS interval and wait
4481 * long enough for a few ZQCS calibrations to occur. This
4482 * should ensure that the calibration circuitry is
4483 * stabilized before read/write leveling occurs.
4484 */
4485 if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4486 save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
4487 /* set smallest interval */
4488 lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7);
4489 } else {
4490 save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int;
4491 /* set smallest interval */
4492 lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7);
4493 }
4494 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4495 lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4496
4497 /*
4498 * Compute an appropriate delay based on the current ZQCS
4499 * interval. The delay should be long enough for the
4500 * current ZQCS delay counter to expire plus ten of the
4501 * minimum intarvals to ensure that some calibrations
4502 * occur.
4503 */
4504 temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs *
4505 100 * 512 * 128) / (10000 * 10000) + 10 *
4506 ((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000);
4507
4508 debug("Waiting %lld usecs for ZQCS calibrations to start\n",
4509 temp_delay_usecs);
4510 udelay(temp_delay_usecs);
4511
4512 if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4513 /* Restore computed interval */
4514 lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
4515 } else {
4516 /* Restore computed interval */
4517 lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int;
4518 }
4519
4520 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4521 lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4522}
4523
4524static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data");
4525static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data");
4526static union cvmx_lmcx_modereg_params1 mp1 __section(".data");
4527
4528static int wl_mask[9] __section(".data");
4529static int byte_idx __section(".data");
4530static int ecc_ena __section(".data");
4531static int wl_roundup __section(".data");
4532static int save_mode32b __section(".data");
4533static int disable_hwl_validity __section(".data");
4534static int default_wl_rtt_nom __section(".data");
4535static int wl_pbm_pump __section(".data");
4536
4537static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx)
4538{
4539 int wloop = 0;
4540 // retries per sample for HW-related issues with bitmasks or values
4541 int wloop_retries = 0;
4542 int wloop_retries_total = 0;
4543 int wloop_retries_exhausted = 0;
4544#define WLOOP_RETRIES_DEFAULT 5
4545 int wl_val_err;
4546 int wl_mask_err_rank = 0;
4547 int wl_val_err_rank = 0;
4548 // array to collect counts of byte-lane values
4549 // assume low-order 3 bits and even, so really only 2-bit values
4550 struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9];
4551 int extra_bumps, extra_mask;
4552 int rank_nom = 0;
4553
4554 if (!(rank_mask & (1 << rankx)))
4555 return;
4556
4557 if (match_wl_rtt_nom) {
4558 if (rankx == 0)
4559 rank_nom = mp1.s.rtt_nom_00;
4560 if (rankx == 1)
4561 rank_nom = mp1.s.rtt_nom_01;
4562 if (rankx == 2)
4563 rank_nom = mp1.s.rtt_nom_10;
4564 if (rankx == 3)
4565 rank_nom = mp1.s.rtt_nom_11;
4566
4567 debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n",
4568 node, if_num, rankx, rank_nom,
4569 imp_val->rtt_nom_ohms[rank_nom]);
4570 }
4571
4572 memset(wl_bytes, 0, sizeof(wl_bytes));
4573 memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra));
4574
4575 // restructure the looping so we can keep trying until we get the
4576 // samples we want
4577 while (wloop < wl_loops) {
4578 wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
4579
4580 wl_ctl.cn78xx.rtt_nom =
4581 (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7;
4582
4583 if (match_wl_rtt_nom) {
4584 wl_ctl.cn78xx.rtt_nom =
4585 (rank_nom > 0) ? (rank_nom - 1) : 7;
4586 }
4587
4588 /* Clear write-level delays */
4589 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0);
4590
4591 wl_mask_err = 0; /* Reset error counters */
4592 wl_val_err = 0;
4593
4594 for (byte_idx = 0; byte_idx < 9; ++byte_idx)
4595 wl_mask[byte_idx] = 0; /* Reset bitmasks */
4596
4597 // do all the byte-lanes at the same time
4598 wl_ctl.cn78xx.lanemask = 0x1ff;
4599
4600 lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
4601
4602 /*
4603 * Read and write values back in order to update the
4604 * status field. This insures that we read the updated
4605 * values after write-leveling has completed.
4606 */
4607 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4608 lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)));
4609
4610 /* write-leveling */
4611 oct3_ddr3_seq(priv, 1 << rankx, if_num, 6);
4612
4613 do {
4614 wl_rank.u64 = lmc_rd(priv,
4615 CVMX_LMCX_WLEVEL_RANKX(rankx,
4616 if_num));
4617 } while (wl_rank.cn78xx.status != 3);
4618
4619 wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
4620 if_num));
4621
4622 for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4623 wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv,
4624 if_num,
4625 byte_idx);
4626 if (wl_mask[byte_idx] == 0)
4627 ++wl_mask_err;
4628 }
4629
4630 // check validity only if no bitmask errors
4631 if (wl_mask_err == 0) {
4632 if ((spd_dimm_type == 1 || spd_dimm_type == 2) &&
4633 dram_width != 16 && if_64b &&
4634 !disable_hwl_validity) {
4635 // bypass if [mini|SO]-[RU]DIMM or x16 or
4636 // 32-bit
4637 wl_val_err =
4638 validate_hw_wl_settings(if_num,
4639 &wl_rank,
4640 spd_rdimm, ecc_ena);
4641 wl_val_err_rank += (wl_val_err != 0);
4642 }
4643 } else {
4644 wl_mask_err_rank++;
4645 }
4646
4647 // before we print, if we had bitmask or validity errors,
4648 // do a retry...
4649 if (wl_mask_err != 0 || wl_val_err != 0) {
4650 if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
4651 wloop_retries++;
4652 wloop_retries_total++;
4653 // this printout is per-retry: only when VBL
4654 // is high enough (DEV?)
4655 // FIXME: do we want to show the bad bitmaps
4656 // or delays here also?
4657 debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
4658 node, if_num, rankx,
4659 (wl_mask_err) ? "Bitmask" : "Validity");
4660 // this takes us back to the top without
4661 // counting a sample
4662 return;
4663 }
4664
4665 // retries exhausted, do not print at normal VBL
4666 debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
4667 node, if_num, rankx,
4668 (wl_mask_err) ? "Bitmask" : "Validity");
4669 wloop_retries_exhausted++;
4670 }
4671 // no errors or exhausted retries, use this sample
4672 wloop_retries = 0; //reset for next sample
4673
4674 // when only 1 sample or forced, print the bitmasks then
4675 // current HW WL
4676 if (wl_loops == 1 || wl_print) {
4677 if (wl_print > 1)
4678 display_wl_bm(if_num, rankx, wl_mask);
4679 display_wl(if_num, wl_rank, rankx);
4680 }
4681
4682 if (wl_roundup) { /* Round up odd bitmask delays */
4683 for (byte_idx = 0; byte_idx < (8 + ecc_ena);
4684 ++byte_idx) {
4685 if (!(if_bytemask & (1 << byte_idx)))
4686 return;
4687 upd_wl_rank(&wl_rank, byte_idx,
4688 roundup_ddr3_wlevel_bitmask
4689 (wl_mask[byte_idx]));
4690 }
4691 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4692 wl_rank.u64);
4693 display_wl(if_num, wl_rank, rankx);
4694 }
4695
4696 // OK, we have a decent sample, no bitmask or validity errors
4697 extra_bumps = 0;
4698 extra_mask = 0;
4699 for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4700 int ix;
4701
4702 if (!(if_bytemask & (1 << byte_idx)))
4703 return;
4704
4705 // increment count of byte-lane value
4706 // only 4 values
4707 ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3;
4708 wl_bytes[byte_idx].bitcnt[ix]++;
4709 wl_bytes_extra[byte_idx].bitcnt[ix]++;
4710 // if perfect...
4711 if (__builtin_popcount(wl_mask[byte_idx]) == 4) {
4712 wl_bytes_extra[byte_idx].bitcnt[ix] +=
4713 wl_pbm_pump;
4714 extra_bumps++;
4715 extra_mask |= 1 << byte_idx;
4716 }
4717 }
4718
4719 if (extra_bumps) {
4720 if (wl_print > 1) {
4721 debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n",
4722 node, if_num, rankx, extra_bumps,
4723 extra_mask);
4724 }
4725 }
4726
4727 // if we get here, we have taken a decent sample
4728 wloop++;
4729
4730 } /* while (wloop < wl_loops) */
4731
4732 // if we did sample more than once, try to pick a majority vote
4733 if (wl_loops > 1) {
4734 // look for the majority in each byte-lane
4735 for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4736 int mx, mc, xc, cc;
4737 int ix, alts;
4738 int maj, xmaj, xmx, xmc, xxc, xcc;
4739
4740 if (!(if_bytemask & (1 << byte_idx)))
4741 return;
4742 maj = find_wl_majority(&wl_bytes[byte_idx], &mx,
4743 &mc, &xc, &cc);
4744 xmaj = find_wl_majority(&wl_bytes_extra[byte_idx],
4745 &xmx, &xmc, &xxc, &xcc);
4746 if (maj != xmaj) {
4747 if (wl_print) {
4748 debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n",
4749 node, if_num, rankx,
4750 byte_idx, maj, xc, xmaj, xxc);
4751 }
4752 mx = xmx;
4753 mc = xmc;
4754 xc = xxc;
4755 cc = xcc;
4756 }
4757
4758 // see if there was an alternate
4759 // take out the majority choice
4760 alts = (mc & ~(1 << mx));
4761 if (alts != 0) {
4762 for (ix = 0; ix < 4; ix++) {
4763 // FIXME: could be done multiple times?
4764 // bad if so
4765 if (alts & (1 << ix)) {
4766 // set the mask
4767 hwl_alts[rankx].hwl_alt_mask |=
4768 (1 << byte_idx);
4769 // record the value
4770 hwl_alts[rankx].hwl_alt_delay[byte_idx] =
4771 ix << 1;
4772 if (wl_print > 1) {
4773 debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
4774 node,
4775 if_num,
4776 rankx,
4777 byte_idx,
4778 mx << 1,
4779 xc,
4780 ix << 1,
4781 wl_bytes
4782 [byte_idx].bitcnt
4783 [ix]);
4784 }
4785 }
4786 }
4787 }
4788
4789 if (cc > 2) { // unlikely, but...
4790 // assume: counts for 3 indices are all 1
4791 // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
4792 // and the desired?: 2 , 4 , 6, 0
4793 // we choose the middle, assuming one of the
4794 // outliers is bad
4795 // NOTE: this is an ugly hack at the moment;
4796 // there must be a better way
4797 switch (mc) {
4798 case 0x7:
4799 mx = 1;
4800 break; // was 0/2/4, choose 2
4801 case 0xb:
4802 mx = 0;
4803 break; // was 0/2/6, choose 0
4804 case 0xd:
4805 mx = 3;
4806 break; // was 0/4/6, choose 6
4807 case 0xe:
4808 mx = 2;
4809 break; // was 2/4/6, choose 4
4810 default:
4811 case 0xf:
4812 mx = 1;
4813 break; // was 0/2/4/6, choose 2?
4814 }
4815 printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
4816 node, if_num, rankx, byte_idx, mc,
4817 mx << 1);
4818 }
4819 upd_wl_rank(&wl_rank, byte_idx, mx << 1);
4820 }
4821
4822 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4823 wl_rank.u64);
4824 display_wl_with_final(if_num, wl_rank, rankx);
4825
4826 // FIXME: does this help make the output a little easier
4827 // to focus?
4828 if (wl_print > 0)
4829 debug("-----------\n");
4830
4831 } /* if (wl_loops > 1) */
4832
4833 // maybe print an error summary for the rank
4834 if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) {
4835 debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
4836 node, if_num, rankx, wl_mask_err_rank,
4837 wl_val_err_rank, wloop_retries_total,
4838 wloop_retries_exhausted);
4839 }
4840}
4841
4842static void lmc_write_leveling(struct ddr_priv *priv)
4843{
4844 union cvmx_lmcx_config cfg;
4845 int rankx;
4846 char *s;
4847
4848 /*
4849 * 4.8.9 LMC Write Leveling
4850 *
4851 * LMC supports an automatic write leveling like that described in the
4852 * JEDEC DDR3 specifications separately per byte-lane.
4853 *
4854 * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations
4855 * must be completed prior to starting this LMC write-leveling sequence.
4856 *
4857 * There are many possible procedures that will write-level all the
4858 * attached DDR3 DRAM parts. One possibility is for software to simply
4859 * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
4860 * describes one possible sequence that uses LMC's autowrite-leveling
4861 * capabilities.
4862 *
4863 * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
4864 * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
4865 * point.
4866 *
4867 * Do the remaining steps 2-7 separately for each rank i with attached
4868 * DRAM.
4869 *
4870 * 2. Write LMC(0)_WLEVEL_RANKi = 0.
4871 *
4872 * 3. For x8 parts:
4873 *
4874 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4875 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
4876 * DRAM.
4877 *
4878 * For x16 parts:
4879 *
4880 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4881 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
4882 * attached DRAM.
4883 *
4884 * 4. Without changing any other fields in LMC(0)_CONFIG,
4885 *
4886 * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
4887 *
4888 * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
4889 *
4890 * o write LMC(0)_SEQ_CTL[INIT_START] = 1
4891 *
4892 * LMC will initiate write-leveling at this point. Assuming
4893 * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
4894 * the selected DRAM rank via a DDR3 MR1 write, then sequences
4895 * through
4896 * and accumulates write-leveling results for eight different delay
4897 * settings twice, starting at a delay of zero in this case since
4898 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
4899 * setting, covering a total distance of one CK, then disables the
4900 * write-leveling via another DDR3 MR1 write.
4901 *
4902 * After the sequence through 16 delay settings is complete:
4903 *
4904 * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
4905 *
4906 * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
4907 * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
4908 * leveling result of 1 that followed result of 0 during the
4909 * sequence, except that the LMC always writes
4910 * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
4911 *
4912 * o Software can read the eight write-leveling results from the
4913 * first pass through the delay settings by reading
4914 * LMC(0)_WLEVEL_DBG[BITMASK] (after writing
4915 * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
4916 * results from the second pass through the eight delay
4917 * settings. They should often be identical to the
4918 * LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
4919 *
4920 * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
4921 *
4922 * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
4923 * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
4924 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
4925 * software wrote in substep 2 above, which is 0.
4926 *
4927 * 6. For x16 parts:
4928 *
4929 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4930 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
4931 * attached DRAM.
4932 *
4933 * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
4934 * setting. Skip to substep 7 if this has already been done.
4935 *
4936 * For x8 parts:
4937 *
4938 * Skip this substep. Go to substep 7.
4939 *
4940 * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
4941 * lanes on all ranks with attached DRAM.
4942 *
4943 * At this point, all byte lanes on rank i with attached DRAM should
4944 * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
4945 * the result for each byte lane.
4946 *
4947 * But note that the DDR3 write-leveling sequence will only determine
4948 * the delay modulo the CK cycle time, and cannot determine how many
4949 * additional CK cycles of delay are present. Software must calculate
4950 * the number of CK cycles, or equivalently, the
4951 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
4952 *
4953 * This BYTE*<4:3> calculation is system/board specific.
4954 *
4955 * Many techniques can be used to calculate write-leveling BYTE*<4:3>
4956 * values, including:
4957 *
4958 * o Known values for some byte lanes.
4959 *
4960 * o Relative values for some byte lanes relative to others.
4961 *
4962 * For example, suppose lane X is likely to require a larger
4963 * write-leveling delay than lane Y. A BYTEX<2:0> value that is much
4964 * smaller than the BYTEY<2:0> value may then indicate that the
4965 * required lane X delay wrapped into the next CK, so BYTEX<4:3>
4966 * should be set to BYTEY<4:3>+1.
4967 *
4968 * When ECC DRAM is not present (i.e. when DRAM is not attached to
4969 * the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the
4970 * DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write
4971 * LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0],
4972 * using the final calculated BYTE0 value.
4973 * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
4974 * using the final calculated BYTE0 value.
4975 *
4976 * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
4977 *
4978 * Let rank i be a rank with attached DRAM.
4979 *
4980 * For all ranks j that do not have attached DRAM, set
4981 * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
4982 */
4983
4984 rankx = 0;
4985 wl_roundup = 0;
4986 disable_hwl_validity = 0;
4987
4988 // wl_pbm_pump: weight for write-leveling PBMs...
4989 // 0 causes original behavior
4990 // 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms
4991 // 4 would allow a minority of 1 pbm to outscore a majority of 4
4992 // non-pbms
4993 wl_pbm_pump = 4; // FIXME: is 4 too much?
4994
4995 if (wl_loops) {
4996 debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node,
4997 if_num);
4998 } else {
4999 /* Force software write-leveling to run */
5000 wl_mask_err = 1;
5001 debug("N%d.LMC%d: Forcing software Write-Leveling\n", node,
5002 if_num);
5003 }
5004
5005 default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ?
5006 rttnom_20ohm : ddr4_rttnom_40ohm;
5007
5008 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5009 ecc_ena = cfg.s.ecc_ena;
5010 save_mode32b = cfg.cn78xx.mode32b;
5011 cfg.cn78xx.mode32b = (!if_64b);
5012 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5013 debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5014
5015 s = lookup_env(priv, "ddr_wlevel_roundup");
5016 if (s)
5017 wl_roundup = simple_strtoul(s, NULL, 0);
5018
5019 s = lookup_env(priv, "ddr_wlevel_printall");
5020 if (s)
5021 wl_print = strtoul(s, NULL, 0);
5022
5023 s = lookup_env(priv, "ddr_wlevel_pbm_bump");
5024 if (s)
5025 wl_pbm_pump = strtoul(s, NULL, 0);
5026
5027 // default to disable when RL sequential delay check is disabled
5028 disable_hwl_validity = disable_sequential_delay_check;
5029 s = lookup_env(priv, "ddr_disable_hwl_validity");
5030 if (s)
5031 disable_hwl_validity = !!strtoul(s, NULL, 0);
5032
5033 s = lookup_env(priv, "ddr_wl_rtt_nom");
5034 if (s)
5035 default_wl_rtt_nom = simple_strtoul(s, NULL, 0);
5036
5037 s = lookup_env(priv, "ddr_match_wl_rtt_nom");
5038 if (s)
5039 match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0);
5040
5041 if (match_wl_rtt_nom)
5042 mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
5043
5044 // For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK
5045 // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
5046 if (ddr_type == DDR4_DRAM) {
5047 int default_or_dis = 1;
5048 int default_bitmask = 0xff;
5049
5050 // when x4, use only the lower nibble
5051 if (dram_width == 4) {
5052 default_bitmask = 0x0f;
5053 if (wl_print) {
5054 debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n",
5055 node, if_num, default_bitmask);
5056 }
5057 }
5058
5059 wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
5060 wl_ctl.s.or_dis = default_or_dis;
5061 wl_ctl.s.bitmask = default_bitmask;
5062
5063 // allow overrides
5064 s = lookup_env(priv, "ddr_wlevel_ctl_or_dis");
5065 if (s)
5066 wl_ctl.s.or_dis = !!strtoul(s, NULL, 0);
5067
5068 s = lookup_env(priv, "ddr_wlevel_ctl_bitmask");
5069 if (s)
5070 wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0);
5071
5072 // print only if not defaults
5073 if (wl_ctl.s.or_dis != default_or_dis ||
5074 wl_ctl.s.bitmask != default_bitmask) {
5075 debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
5076 node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask);
5077 }
5078
5079 // always write
5080 lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
5081 }
5082
5083 // Start the hardware write-leveling loop per rank
5084 for (rankx = 0; rankx < dimm_count * 4; rankx++)
5085 lmc_write_leveling_loop(priv, rankx);
5086
5087 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5088 cfg.cn78xx.mode32b = save_mode32b;
5089 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5090 debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5091
5092 // At the end of HW Write Leveling, check on some DESKEW things...
5093 if (!disable_deskew_training) {
5094 struct deskew_counts dsk_counts;
5095 int retry_count = 0;
5096
5097 debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n",
5098 node, if_num);
5099
5100 do {
5101 validate_deskew_training(priv, rank_mask, if_num,
5102 &dsk_counts, 1);
5103
5104 // only RAWCARD A or B will not benefit from
5105 // retraining if there's only saturation
5106 // or any rawcard if there is a nibble error
5107 if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) ||
5108 (dsk_counts.nibrng_errs != 0 ||
5109 dsk_counts.nibunl_errs != 0)) {
5110 retry_count++;
5111 debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
5112 node, if_num, retry_count);
5113 perform_deskew_training(priv, rank_mask, if_num,
5114 spd_rawcard_aorb);
5115 } else {
5116 break;
5117 }
5118 } while (retry_count < 5);
5119 }
5120}
5121
5122static void lmc_workaround(struct ddr_priv *priv)
5123{
5124 /* Workaround Trcd overflow by using Additive latency. */
5125 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
5126 union cvmx_lmcx_modereg_params0 mp0;
5127 union cvmx_lmcx_timing_params1 tp1;
5128 union cvmx_lmcx_control ctrl;
5129 int rankx;
5130
5131 tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
5132 mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
5133 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
5134
5135 if (tp1.cn78xx.trcd == 0) {
5136 debug("Workaround Trcd overflow by using Additive latency.\n");
5137 /* Hard code this to 12 and enable additive latency */
5138 tp1.cn78xx.trcd = 12;
5139 mp0.s.al = 2; /* CL-2 */
5140 ctrl.s.pocas = 1;
5141
5142 debug("MODEREG_PARAMS0 : 0x%016llx\n",
5143 mp0.u64);
5144 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
5145 mp0.u64);
5146 debug("TIMING_PARAMS1 : 0x%016llx\n",
5147 tp1.u64);
5148 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
5149
5150 debug("LMC_CONTROL : 0x%016llx\n",
5151 ctrl.u64);
5152 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
5153
5154 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5155 if (!(rank_mask & (1 << rankx)))
5156 continue;
5157
5158 /* MR1 */
5159 ddr4_mrw(priv, if_num, rankx, -1, 1, 0);
5160 }
5161 }
5162 }
5163
5164 // this is here just for output, to allow check of the Deskew
5165 // settings one last time...
5166 if (!disable_deskew_training) {
5167 struct deskew_counts dsk_counts;
5168
5169 debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
5170 node, if_num);
5171 validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
5172 3);
5173 }
5174
5175 /*
5176 * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x)
5177 *
5178 * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
5179 * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
5180 * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
5181 */
5182 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
5183 octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) {
5184 union cvmx_lmcx_dll_ctl3 dll_ctl3;
5185 union cvmx_lmcx_phy_ctl2 phy_ctl2;
5186 union cvmx_lmcx_ext_config ext_cfg;
5187 int increased_dsk_adj = 0;
5188 int byte;
5189
5190 phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num));
5191 ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
5192 dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
5193
5194 for (byte = 0; byte < 8; ++byte) {
5195 if (!(if_bytemask & (1 << byte)))
5196 continue;
5197 increased_dsk_adj |=
5198 (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4);
5199 }
5200
5201 if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) {
5202 ext_cfg.s.drive_ena_bprch = 1;
5203 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
5204 debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n",
5205 if_num);
5206 }
5207 }
5208}
5209
5210// Software Write-Leveling block
5211
5212#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
5213#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
5214// full window is valid for 0x00 to 0x4A
5215// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
5216#define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
5217#define VREF_FINAL (VREF_LIMIT - 1)
5218
5219enum sw_wl_status {
5220 WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */
5221 WL_HARDWARE = 1, /* H/W wleveling succeeded */
5222 WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */
5223 WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */
5224};
5225
5226static u64 rank_addr __section(".data");
5227static int vref_val __section(".data");
5228static int final_vref_val __section(".data");
5229static int final_vref_range __section(".data");
5230static int start_vref_val __section(".data");
5231static int computed_final_vref_val __section(".data");
5232static char best_vref_val_count __section(".data");
5233static char vref_val_count __section(".data");
5234static char best_vref_val_start __section(".data");
5235static char vref_val_start __section(".data");
5236static int bytes_failed __section(".data");
5237static enum sw_wl_status byte_test_status[9] __section(".data");
5238static enum sw_wl_status sw_wl_rank_status __section(".data");
5239static int sw_wl_failed __section(".data");
5240static int sw_wl_hw __section(".data");
5241static int measured_vref_flag __section(".data");
5242
5243static void ddr4_vref_loop(struct ddr_priv *priv, int rankx)
5244{
5245 char *s;
5246
5247 if (vref_val < VREF_FINAL) {
5248 int vrange, vvalue;
5249
5250 if (vref_val < VREF_RANGE2_LIMIT) {
5251 vrange = 1;
5252 vvalue = vref_val;
5253 } else {
5254 vrange = 0;
5255 vvalue = vref_val - VREF_RANGE2_LIMIT;
5256 }
5257
5258 set_vref(priv, if_num, rankx, vrange, vvalue);
5259 } else { /* if (vref_val < VREF_FINAL) */
5260 /* Print the final vref value first. */
5261
5262 /* Always print the computed first if its valid */
5263 if (computed_final_vref_val >= 0) {
5264 debug("N%d.LMC%d.R%d: vref Computed Summary : %2d (0x%02x)\n",
5265 node, if_num, rankx,
5266 computed_final_vref_val, computed_final_vref_val);
5267 }
5268
5269 if (!measured_vref_flag) { // setup to use the computed
5270 best_vref_val_count = 1;
5271 final_vref_val = computed_final_vref_val;
5272 } else { // setup to use the measured
5273 if (best_vref_val_count > 0) {
5274 best_vref_val_count =
5275 max(best_vref_val_count, (char)2);
5276 final_vref_val = best_vref_val_start +
5277 divide_nint(best_vref_val_count - 1, 2);
5278
5279 if (final_vref_val < VREF_RANGE2_LIMIT) {
5280 final_vref_range = 1;
5281 } else {
5282 final_vref_range = 0;
5283 final_vref_val -= VREF_RANGE2_LIMIT;
5284 }
5285
5286 int vvlo = best_vref_val_start;
5287 int vrlo;
5288 int vvhi = best_vref_val_start +
5289 best_vref_val_count - 1;
5290 int vrhi;
5291
5292 if (vvlo < VREF_RANGE2_LIMIT) {
5293 vrlo = 2;
5294 } else {
5295 vrlo = 1;
5296 vvlo -= VREF_RANGE2_LIMIT;
5297 }
5298
5299 if (vvhi < VREF_RANGE2_LIMIT) {
5300 vrhi = 2;
5301 } else {
5302 vrhi = 1;
5303 vvhi -= VREF_RANGE2_LIMIT;
5304 }
5305 debug("N%d.LMC%d.R%d: vref Training Summary : 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
5306 node, if_num, rankx, vvlo, vrlo,
5307 final_vref_val,
5308 final_vref_range + 1, vvhi, vrhi,
5309 best_vref_val_count - 1);
5310
5311 } else {
5312 /*
5313 * If nothing passed use the default vref
5314 * value for this rank
5315 */
5316 union cvmx_lmcx_modereg_params2 mp2;
5317
5318 mp2.u64 =
5319 lmc_rd(priv,
5320 CVMX_LMCX_MODEREG_PARAMS2(if_num));
5321 final_vref_val = (mp2.u64 >>
5322 (rankx * 10 + 3)) & 0x3f;
5323 final_vref_range = (mp2.u64 >>
5324 (rankx * 10 + 9)) & 0x01;
5325
5326 debug("N%d.LMC%d.R%d: vref Using Default : %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
5327 node, if_num, rankx, final_vref_val,
5328 final_vref_val, final_vref_val,
5329 final_vref_val, final_vref_range + 1);
5330 }
5331 }
5332
5333 // allow override
5334 s = lookup_env(priv, "ddr%d_vref_val_%1d%1d",
5335 if_num, !!(rankx & 2), !!(rankx & 1));
5336 if (s)
5337 final_vref_val = strtoul(s, NULL, 0);
5338
5339 set_vref(priv, if_num, rankx, final_vref_range, final_vref_val);
5340 }
5341}
5342
5343#define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors
5344
5345static int errors __section(".data");
5346static int byte_delay[9] __section(".data");
5347static u64 bytemask __section(".data");
5348static int bytes_todo __section(".data");
5349static int no_errors_count __section(".data");
5350static u64 bad_bits[2] __section(".data");
5351static u64 sum_dram_dclk __section(".data");
5352static u64 sum_dram_ops __section(".data");
5353static u64 start_dram_dclk __section(".data");
5354static u64 stop_dram_dclk __section(".data");
5355static u64 start_dram_ops __section(".data");
5356static u64 stop_dram_ops __section(".data");
5357
5358static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx)
5359{
5360 int delay;
5361 int b;
5362
5363 // write the current set of WL delays
5364 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64);
5365 wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
5366
5367 // do the test
5368 if (sw_wl_hw) {
5369 errors = run_best_hw_patterns(priv, if_num, rank_addr,
5370 DBTRAIN_TEST, bad_bits);
5371 errors &= bytes_todo; // keep only the ones we are still doing
5372 } else {
5373 start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5374 start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5375 errors = test_dram_byte64(priv, if_num, rank_addr, bytemask,
5376 bad_bits);
5377
5378 stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5379 stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5380 sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
5381 sum_dram_ops += stop_dram_ops - start_dram_ops;
5382 }
5383
5384 debug("WL pass1: test_dram_byte returned 0x%x\n", errors);
5385
5386 // remember, errors will not be returned for byte-lanes that have
5387 // maxxed out...
5388 if (errors == 0) {
5389 no_errors_count++; // bump
5390 // bypass check/update completely
5391 if (no_errors_count > 1)
5392 return; // to end of do-while
5393 } else {
5394 no_errors_count = 0; // reset
5395 }
5396
5397 // check errors by byte
5398 for (b = 0; b < 9; ++b) {
5399 if (!(bytes_todo & (1 << b)))
5400 continue;
5401
5402 delay = byte_delay[b];
5403 // yes, an error in this byte lane
5404 if (errors & (1 << b)) {
5405 debug(" byte %d delay %2d Errors\n", b, delay);
5406 // since this byte had an error, we move to the next
5407 // delay value, unless done with it
5408 delay += 8; // incr by 8 to do delay high-order bits
5409 if (delay < 32) {
5410 upd_wl_rank(&wl_rank, b, delay);
5411 debug(" byte %d delay %2d New\n",
5412 b, delay);
5413 byte_delay[b] = delay;
5414 } else {
5415 // reached max delay, maybe really done with
5416 // this byte
5417 // consider an alt only for computed VREF and
5418 if (!measured_vref_flag &&
5419 (hwl_alts[rankx].hwl_alt_mask & (1 << b))) {
5420 // if an alt exists...
5421 // just orig low-3 bits
5422 int bad_delay = delay & 0x6;
5423
5424 // yes, use it
5425 delay = hwl_alts[rankx].hwl_alt_delay[b];
5426 // clear that flag
5427 hwl_alts[rankx].hwl_alt_mask &=
5428 ~(1 << b);
5429 upd_wl_rank(&wl_rank, b, delay);
5430 byte_delay[b] = delay;
5431 debug(" byte %d delay %2d ALTERNATE\n",
5432 b, delay);
5433 debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
5434 node, if_num,
5435 rankx, b, bad_delay, delay);
5436
5437 } else {
5438 unsigned int bits_bad;
5439
5440 if (b < 8) {
5441 // test no longer, remove from
5442 // byte mask
5443 bytemask &=
5444 ~(0xffULL << (8 * b));
5445 bits_bad = (unsigned int)
5446 ((bad_bits[0] >>
5447 (8 * b)) & 0xffUL);
5448 } else {
5449 bits_bad = (unsigned int)
5450 (bad_bits[1] & 0xffUL);
5451 }
5452
5453 // remove from bytes to do
5454 bytes_todo &= ~(1 << b);
5455 // make sure this is set for this case
5456 byte_test_status[b] = WL_ESTIMATED;
5457 debug(" byte %d delay %2d Exhausted\n",
5458 b, delay);
5459 if (!measured_vref_flag) {
5460 // this is too noisy when doing
5461 // measured VREF
5462 debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n",
5463 node, if_num, rankx,
5464 b, bits_bad, delay);
5465 }
5466 }
5467 }
5468 } else {
5469 // no error, stay with current delay, but keep testing
5470 // it...
5471 debug(" byte %d delay %2d Passed\n", b, delay);
5472 byte_test_status[b] = WL_HARDWARE; // change status
5473 }
5474 } /* for (b = 0; b < 9; ++b) */
5475}
5476
5477static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx)
5478{
5479 int save_byte8 = wl_rank.s.byte8;
5480
5481 byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
5482
5483 if (save_byte8 != wl_rank.s.byte3 &&
5484 save_byte8 != wl_rank.s.byte4) {
5485 int test_byte8 = save_byte8;
5486 int test_byte8_error;
5487 int byte8_error = 0x1f;
5488 int adder;
5489 int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4,
5490 2);
5491
5492 for (adder = 0; adder <= 32; adder += 8) {
5493 test_byte8_error = abs((adder + save_byte8) -
5494 avg_bytes);
5495 if (test_byte8_error < byte8_error) {
5496 byte8_error = test_byte8_error;
5497 test_byte8 = save_byte8 + adder;
5498 }
5499 }
5500
5501 // only do the check if we are not using measured VREF
5502 if (!measured_vref_flag) {
5503 /* Use only even settings, rounding down... */
5504 test_byte8 &= ~1;
5505
5506 // do validity check on the calculated ECC delay value
5507 // this depends on the DIMM type
5508 if (spd_rdimm) { // RDIMM
5509 // but not mini-RDIMM
5510 if (spd_dimm_type != 5) {
5511 // it can be > byte4, but should never
5512 // be > byte3
5513 if (test_byte8 > wl_rank.s.byte3) {
5514 /* say it is still estimated */
5515 byte_test_status[8] =
5516 WL_ESTIMATED;
5517 }
5518 }
5519 } else { // UDIMM
5520 if (test_byte8 < wl_rank.s.byte3 ||
5521 test_byte8 > wl_rank.s.byte4) {
5522 // should never be outside the
5523 // byte 3-4 range
5524 /* say it is still estimated */
5525 byte_test_status[8] = WL_ESTIMATED;
5526 }
5527 }
5528 /*
5529 * Report whenever the calculation appears bad.
5530 * This happens if some of the original values were off,
5531 * or unexpected geometry from DIMM type, or custom
5532 * circuitry (NIC225E, I am looking at you!).
5533 * We will trust the calculated value, and depend on
5534 * later testing to catch any instances when that
5535 * value is truly bad.
5536 */
5537 // ESTIMATED means there may be an issue
5538 if (byte_test_status[8] == WL_ESTIMATED) {
5539 debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
5540 node, if_num, rankx,
5541 (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4,
5542 test_byte8, wl_rank.s.byte3);
5543 byte_test_status[8] = WL_HARDWARE;
5544 }
5545 }
5546 /* Use only even settings */
5547 wl_rank.s.byte8 = test_byte8 & ~1;
5548 }
5549
5550 if (wl_rank.s.byte8 != save_byte8) {
5551 /* Change the status if s/w adjusted the delay */
5552 byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */
5553 }
5554}
5555
5556static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv,
5557 int rankx)
5558{
5559 int errors;
5560 int byte_delay[8];
5561 int byte_passed[8];
5562 u64 bytemask;
5563 u64 bitmask;
5564 int wl_offset;
5565 int bytes_todo;
5566 int sw_wl_offset = 1;
5567 int delay;
5568 int b;
5569
5570 for (b = 0; b < 8; ++b)
5571 byte_passed[b] = 0;
5572
5573 bytes_todo = if_bytemask;
5574
5575 for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) {
5576 debug("Starting wl_offset for-loop: %d\n", wl_offset);
5577
5578 bytemask = 0;
5579
5580 for (b = 0; b < 8; ++b) {
5581 byte_delay[b] = 0;
5582 // this does not contain fully passed bytes
5583 if (!(bytes_todo & (1 << b)))
5584 continue;
5585
5586 // reset across passes if not fully passed
5587 byte_passed[b] = 0;
5588 upd_wl_rank(&wl_rank, b, 0); // all delays start at 0
5589 bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff;
5590 // set the bytes bits in the bytemask
5591 bytemask |= bitmask << (8 * b);
5592 } /* for (b = 0; b < 8; ++b) */
5593
5594 // start a pass if there is any byte lane to test
5595 while (bytemask != 0) {
5596 debug("Starting bytemask while-loop: 0x%llx\n",
5597 bytemask);
5598
5599 // write this set of WL delays
5600 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
5601 wl_rank.u64);
5602 wl_rank.u64 = lmc_rd(priv,
5603 CVMX_LMCX_WLEVEL_RANKX(rankx,
5604 if_num));
5605
5606 // do the test
5607 if (sw_wl_hw) {
5608 errors = run_best_hw_patterns(priv, if_num,
5609 rank_addr,
5610 DBTRAIN_TEST,
5611 NULL) & 0xff;
5612 } else {
5613 errors = test_dram_byte64(priv, if_num,
5614 rank_addr, bytemask,
5615 NULL);
5616 }
5617
5618 debug("test_dram_byte returned 0x%x\n", errors);
5619
5620 // check errors by byte
5621 for (b = 0; b < 8; ++b) {
5622 if (!(bytes_todo & (1 << b)))
5623 continue;
5624
5625 delay = byte_delay[b];
5626 if (errors & (1 << b)) { // yes, an error
5627 debug(" byte %d delay %2d Errors\n",
5628 b, delay);
5629 byte_passed[b] = 0;
5630 } else { // no error
5631 byte_passed[b] += 1;
5632 // Look for consecutive working settings
5633 if (byte_passed[b] == (1 + wl_offset)) {
5634 debug(" byte %d delay %2d FULLY Passed\n",
5635 b, delay);
5636 if (wl_offset == 1) {
5637 byte_test_status[b] =
5638 WL_SOFTWARE;
5639 } else if (wl_offset == 0) {
5640 byte_test_status[b] =
5641 WL_SOFTWARE1;
5642 }
5643
5644 // test no longer, remove
5645 // from byte mask this pass
5646 bytemask &= ~(0xffULL <<
5647 (8 * b));
5648 // remove completely from
5649 // concern
5650 bytes_todo &= ~(1 << b);
5651 // on to the next byte, bypass
5652 // delay updating!!
5653 continue;
5654 } else {
5655 debug(" byte %d delay %2d Passed\n",
5656 b, delay);
5657 }
5658 }
5659
5660 // error or no, here we move to the next delay
5661 // value for this byte, unless done all delays
5662 // only a byte that has "fully passed" will
5663 // bypass around this,
5664 delay += 2;
5665 if (delay < 32) {
5666 upd_wl_rank(&wl_rank, b, delay);
5667 debug(" byte %d delay %2d New\n",
5668 b, delay);
5669 byte_delay[b] = delay;
5670 } else {
5671 // reached max delay, done with this
5672 // byte
5673 debug(" byte %d delay %2d Exhausted\n",
5674 b, delay);
5675 // test no longer, remove from byte
5676 // mask this pass
5677 bytemask &= ~(0xffULL << (8 * b));
5678 }
5679 } /* for (b = 0; b < 8; ++b) */
5680 debug("End of for-loop: bytemask 0x%llx\n", bytemask);
5681 } /* while (bytemask != 0) */
5682 }
5683
5684 for (b = 0; b < 8; ++b) {
5685 // any bytes left in bytes_todo did not pass
5686 if (bytes_todo & (1 << b)) {
5687 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank;
5688
5689 /*
5690 * Last resort. Use Rlevel settings to estimate
5691 * Wlevel if software write-leveling fails
5692 */
5693 debug("Using RLEVEL as WLEVEL estimate for byte %d\n",
5694 b);
5695 lmc_rlevel_rank.u64 =
5696 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
5697 if_num));
5698 rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b);
5699 }
5700 } /* for (b = 0; b < 8; ++b) */
5701}
5702
5703static int lmc_sw_write_leveling(struct ddr_priv *priv)
5704{
5705 /* Try to determine/optimize write-level delays experimentally. */
5706 union cvmx_lmcx_wlevel_rankx wl_rank_hw_res;
5707 union cvmx_lmcx_config cfg;
5708 int rankx;
5709 int byte;
5710 char *s;
5711 int i;
5712
5713 int active_rank;
5714 int sw_wl_enable = 1; /* FIX... Should be customizable. */
5715 int interfaces;
5716
5717 static const char * const wl_status_strings[] = {
5718 "(e)",
5719 " ",
5720 " ",
5721 "(1)"
5722 };
5723
5724 // FIXME: make HW-assist the default now?
5725 int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT;
5726 int dram_connection = c_cfg->dram_connection;
5727
5728 s = lookup_env(priv, "ddr_sw_wlevel_hw");
5729 if (s)
5730 sw_wl_hw_default = !!strtoul(s, NULL, 0);
5731 if (!if_64b) // must use SW algo if 32-bit mode
5732 sw_wl_hw_default = 0;
5733
5734 // can never use hw-assist
5735 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
5736 sw_wl_hw_default = 0;
5737
5738 s = lookup_env(priv, "ddr_software_wlevel");
5739 if (s)
5740 sw_wl_enable = strtoul(s, NULL, 0);
5741
5742 s = lookup_env(priv, "ddr%d_dram_connection", if_num);
5743 if (s)
5744 dram_connection = !!strtoul(s, NULL, 0);
5745
5746 cvmx_rng_enable();
5747
5748 /*
5749 * Get the measured_vref setting from the config, check for an
5750 * override...
5751 */
5752 /* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */
5753 // NOTE: measured VREF can only be done for DDR4
5754 if (ddr_type == DDR4_DRAM) {
5755 measured_vref_flag = c_cfg->measured_vref;
5756 s = lookup_env(priv, "ddr_measured_vref");
5757 if (s)
5758 measured_vref_flag = !!strtoul(s, NULL, 0);
5759 } else {
5760 measured_vref_flag = 0; // OFF for DDR3
5761 }
5762
5763 /*
5764 * Ensure disabled ECC for DRAM tests using the SW algo, else leave
5765 * it untouched
5766 */
5767 if (!sw_wl_hw_default) {
5768 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5769 cfg.cn78xx.ecc_ena = 0;
5770 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5771 }
5772
5773 /*
5774 * We need to track absolute rank number, as well as how many
5775 * active ranks we have. Two single rank DIMMs show up as
5776 * ranks 0 and 2, but only 2 ranks are active.
5777 */
5778 active_rank = 0;
5779
5780 interfaces = __builtin_popcount(if_mask);
5781
5782 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5783 final_vref_range = 0;
5784 start_vref_val = 0;
5785 computed_final_vref_val = -1;
5786 sw_wl_rank_status = WL_HARDWARE;
5787 sw_wl_failed = 0;
5788 sw_wl_hw = sw_wl_hw_default;
5789
5790 if (!sw_wl_enable)
5791 break;
5792
5793 if (!(rank_mask & (1 << rankx)))
5794 continue;
5795
5796 debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
5797 node, if_num, rankx,
5798 (sw_wl_hw) ? "with H/W assist" :
5799 "with S/W algorithm");
5800
5801 if (ddr_type == DDR4_DRAM && num_ranks != 4) {
5802 // always compute when we can...
5803 computed_final_vref_val =
5804 compute_vref_val(priv, if_num, rankx, dimm_count,
5805 num_ranks, imp_val,
5806 is_stacked_die, dram_connection);
5807
5808 // but only use it if allowed
5809 if (!measured_vref_flag) {
5810 // skip all the measured vref processing,
5811 // just the final setting
5812 start_vref_val = VREF_FINAL;
5813 }
5814 }
5815
5816 /* Save off the h/w wl results */
5817 wl_rank_hw_res.u64 = lmc_rd(priv,
5818 CVMX_LMCX_WLEVEL_RANKX(rankx,
5819 if_num));
5820
5821 vref_val_count = 0;
5822 vref_val_start = 0;
5823 best_vref_val_count = 0;
5824 best_vref_val_start = 0;
5825
5826 /* Loop one extra time using the Final vref value. */
5827 for (vref_val = start_vref_val; vref_val < VREF_LIMIT;
5828 ++vref_val) {
5829 if (ddr_type == DDR4_DRAM)
5830 ddr4_vref_loop(priv, rankx);
5831
5832 /* Restore the saved value */
5833 wl_rank.u64 = wl_rank_hw_res.u64;
5834
5835 for (byte = 0; byte < 9; ++byte)
5836 byte_test_status[byte] = WL_ESTIMATED;
5837
5838 if (wl_mask_err == 0) {
5839 /*
5840 * Determine address of DRAM to test for
5841 * pass 1 of software write leveling.
5842 */
5843 rank_addr = active_rank *
5844 (1ull << (pbank_lsb - bunk_enable +
5845 (interfaces / 2)));
5846
5847 /*
5848 * Adjust address for boot bus hole in memory
5849 * map.
5850 */
5851 if (rank_addr > 0x10000000)
5852 rank_addr += 0x10000000;
5853
5854 debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
5855 node, if_num, rankx, active_rank,
5856 rank_addr);
5857
5858 // start parallel write-leveling block for
5859 // delay high-order bits
5860 errors = 0;
5861 no_errors_count = 0;
5862 sum_dram_dclk = 0;
5863 sum_dram_ops = 0;
5864
5865 if (if_64b) {
5866 bytes_todo = (sw_wl_hw) ?
5867 if_bytemask : 0xFF;
5868 bytemask = ~0ULL;
5869 } else {
5870 // 32-bit, must be using SW algo,
5871 // only data bytes
5872 bytes_todo = 0x0f;
5873 bytemask = 0x00000000ffffffffULL;
5874 }
5875
5876 for (byte = 0; byte < 9; ++byte) {
5877 if (!(bytes_todo & (1 << byte))) {
5878 byte_delay[byte] = 0;
5879 } else {
5880 byte_delay[byte] =
5881 get_wl_rank(&wl_rank, byte);
5882 }
5883 } /* for (byte = 0; byte < 9; ++byte) */
5884
5885 do {
5886 lmc_sw_write_leveling_loop(priv, rankx);
5887 } while (no_errors_count <
5888 WL_MIN_NO_ERRORS_COUNT);
5889
5890 if (!sw_wl_hw) {
5891 u64 percent_x10;
5892
5893 if (sum_dram_dclk == 0)
5894 sum_dram_dclk = 1;
5895 percent_x10 = sum_dram_ops * 1000 /
5896 sum_dram_dclk;
5897 debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
5898 node, if_num, rankx, sum_dram_ops,
5899 sum_dram_dclk, percent_x10 / 10,
5900 percent_x10 % 10);
5901 }
5902 if (errors) {
5903 debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n",
5904 vref_val, vref_val, errors);
5905 }
5906 // end parallel write-leveling block for
5907 // delay high-order bits
5908
5909 // if we used HW-assist, we did the ECC byte
5910 // when approp.
5911 if (sw_wl_hw) {
5912 if (wl_print) {
5913 debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n",
5914 node, if_num, rankx);
5915 }
5916 goto no_ecc_estimate;
5917 }
5918
5919 if ((if_bytemask & 0xff) == 0xff) {
5920 if (use_ecc) {
5921 sw_write_lvl_use_ecc(priv,
5922 rankx);
5923 } else {
5924 /* H/W delay value */
5925 byte_test_status[8] =
5926 WL_HARDWARE;
5927 /* ECC is not used */
5928 wl_rank.s.byte8 =
5929 wl_rank.s.byte0;
5930 }
5931 } else {
5932 if (use_ecc) {
5933 /* Estimate the ECC byte dly */
5934 // add hi-order to b4
5935 wl_rank.s.byte4 |=
5936 (wl_rank.s.byte3 &
5937 0x38);
5938 if ((wl_rank.s.byte4 & 0x06) <
5939 (wl_rank.s.byte3 & 0x06)) {
5940 // must be next clock
5941 wl_rank.s.byte4 += 8;
5942 }
5943 } else {
5944 /* ECC is not used */
5945 wl_rank.s.byte4 =
5946 wl_rank.s.byte0;
5947 }
5948
5949 /*
5950 * Change the status if s/w adjusted
5951 * the delay
5952 */
5953 /* Estimated delay */
5954 byte_test_status[4] = WL_SOFTWARE;
5955 } /* if ((if_bytemask & 0xff) == 0xff) */
5956 } /* if (wl_mask_err == 0) */
5957
5958no_ecc_estimate:
5959
5960 bytes_failed = 0;
5961 for (byte = 0; byte < 9; ++byte) {
5962 /* Don't accumulate errors for untested bytes */
5963 if (!(if_bytemask & (1 << byte)))
5964 continue;
5965 bytes_failed +=
5966 (byte_test_status[byte] == WL_ESTIMATED);
5967 }
5968
5969 /* vref training loop is only used for DDR4 */
5970 if (ddr_type != DDR4_DRAM)
5971 break;
5972
5973 if (bytes_failed == 0) {
5974 if (vref_val_count == 0)
5975 vref_val_start = vref_val;
5976
5977 ++vref_val_count;
5978 if (vref_val_count > best_vref_val_count) {
5979 best_vref_val_count = vref_val_count;
5980 best_vref_val_start = vref_val_start;
5981 debug("N%d.LMC%d.R%d: vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n",
5982 node, if_num, rankx, vref_val,
5983 best_vref_val_start,
5984 best_vref_val_start +
5985 best_vref_val_count - 1);
5986 }
5987 } else {
5988 vref_val_count = 0;
5989 debug("N%d.LMC%d.R%d: vref Training (%2d) : failed\n",
5990 node, if_num, rankx, vref_val);
5991 }
5992 }
5993
5994 /*
5995 * Determine address of DRAM to test for software write
5996 * leveling.
5997 */
5998 rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable +
5999 (interfaces / 2)));
6000 /* Adjust address for boot bus hole in memory map. */
6001 if (rank_addr > 0x10000000)
6002 rank_addr += 0x10000000;
6003
6004 debug("Rank Address: 0x%llx\n", rank_addr);
6005
6006 if (bytes_failed) {
6007 // FIXME? the big hammer, did not even try SW WL pass2,
6008 // assume only chip reset will help
6009 debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
6010 node, if_num, rankx);
6011 sw_wl_failed = 1;
6012 } else { /* if (bytes_failed) */
6013 // SW WL pass 1 was OK, write the settings
6014 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6015 wl_rank.u64);
6016 wl_rank.u64 = lmc_rd(priv,
6017 CVMX_LMCX_WLEVEL_RANKX(rankx,
6018 if_num));
6019
6020 // do validity check on the delay values by running
6021 // the test 1 more time...
6022 // FIXME: we really need to check the ECC byte setting
6023 // here as well, so we need to enable ECC for this test!
6024 // if there are any errors, claim SW WL failure
6025 u64 datamask = (if_64b) ? 0xffffffffffffffffULL :
6026 0x00000000ffffffffULL;
6027 int errors;
6028
6029 // do the test
6030 if (sw_wl_hw) {
6031 errors = run_best_hw_patterns(priv, if_num,
6032 rank_addr,
6033 DBTRAIN_TEST,
6034 NULL) & 0xff;
6035 } else {
6036 errors = test_dram_byte64(priv, if_num,
6037 rank_addr, datamask,
6038 NULL);
6039 }
6040
6041 if (errors) {
6042 debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n",
6043 node, if_num, rankx, errors);
6044 sw_wl_failed = 1;
6045 }
6046 } /* if (bytes_failed) */
6047
6048 // FIXME? dump the WL settings, so we get more of a clue
6049 // as to what happened where
6050 debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
6051 node, if_num, rankx, wl_rank.s.status, wl_rank.u64,
6052 wl_rank.s.byte8, wl_status_strings[byte_test_status[8]],
6053 wl_rank.s.byte7, wl_status_strings[byte_test_status[7]],
6054 wl_rank.s.byte6, wl_status_strings[byte_test_status[6]],
6055 wl_rank.s.byte5, wl_status_strings[byte_test_status[5]],
6056 wl_rank.s.byte4, wl_status_strings[byte_test_status[4]],
6057 wl_rank.s.byte3, wl_status_strings[byte_test_status[3]],
6058 wl_rank.s.byte2, wl_status_strings[byte_test_status[2]],
6059 wl_rank.s.byte1, wl_status_strings[byte_test_status[1]],
6060 wl_rank.s.byte0, wl_status_strings[byte_test_status[0]],
6061 (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)");
6062
6063 // finally, check for fatal conditions: either chip reset
6064 // right here, or return error flag
6065 if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) ||
6066 sw_wl_failed) {
6067 if (!ddr_disable_chip_reset) { // do chip RESET
6068 printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n",
6069 node, if_num, rankx);
6070 mdelay(500);
6071 do_reset(NULL, 0, 0, NULL);
6072 } else {
6073 // return error flag so LMC init can be retried.
6074 debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n",
6075 node, if_num, rankx);
6076 return -EAGAIN; // 0 indicates restart possible.
6077 }
6078 }
6079 active_rank++;
6080 }
6081
6082 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6083 int parameter_set = 0;
6084 u64 value;
6085
6086 if (!(rank_mask & (1 << rankx)))
6087 continue;
6088
6089 wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
6090 if_num));
6091
6092 for (i = 0; i < 9; ++i) {
6093 s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d",
6094 if_num, rankx, i);
6095 if (s) {
6096 parameter_set |= 1;
6097 value = strtoul(s, NULL, 0);
6098
6099 upd_wl_rank(&wl_rank, i, value);
6100 }
6101 }
6102
6103 s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx);
6104 if (s) {
6105 parameter_set |= 1;
6106 value = strtoull(s, NULL, 0);
6107 wl_rank.u64 = value;
6108 }
6109
6110 if (parameter_set) {
6111 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6112 wl_rank.u64);
6113 wl_rank.u64 =
6114 lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
6115 display_wl(if_num, wl_rank, rankx);
6116 }
6117 // if there are unused entries to be filled
6118 if ((rank_mask & 0x0F) != 0x0F) {
6119 if (rankx < 3) {
6120 debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
6121 node, if_num, rankx);
6122
6123 // if rank 0, write ranks 1 and 2 here if empty
6124 if (rankx == 0) {
6125 // check that rank 1 is empty
6126 if (!(rank_mask & (1 << 1))) {
6127 debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6128 node, if_num, rankx, 1);
6129 lmc_wr(priv,
6130 CVMX_LMCX_WLEVEL_RANKX(1,
6131 if_num),
6132 wl_rank.u64);
6133 }
6134
6135 // check that rank 2 is empty
6136 if (!(rank_mask & (1 << 2))) {
6137 debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6138 node, if_num, rankx, 2);
6139 lmc_wr(priv,
6140 CVMX_LMCX_WLEVEL_RANKX(2,
6141 if_num),
6142 wl_rank.u64);
6143 }
6144 }
6145
6146 // if rank 0, 1 or 2, write rank 3 here if empty
6147 // check that rank 3 is empty
6148 if (!(rank_mask & (1 << 3))) {
6149 debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6150 node, if_num, rankx, 3);
6151 lmc_wr(priv,
6152 CVMX_LMCX_WLEVEL_RANKX(3,
6153 if_num),
6154 wl_rank.u64);
6155 }
6156 }
6157 }
6158 }
6159
6160 /* Enable 32-bit mode if required. */
6161 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
6162 cfg.cn78xx.mode32b = (!if_64b);
6163 debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
6164
6165 /* Restore the ECC configuration */
6166 if (!sw_wl_hw_default)
6167 cfg.cn78xx.ecc_ena = use_ecc;
6168
6169 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
6170
6171 return 0;
6172}
6173
6174static void lmc_dll(struct ddr_priv *priv)
6175{
6176 union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
6177 int setting[9];
6178 int i;
6179
6180 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6181
6182 for (i = 0; i < 9; ++i) {
6183 SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
6184 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
6185 lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6186 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6187 setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
6188 debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num,
6189 GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64,
6190 setting[i]);
6191 }
6192
6193 debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
6194 node, if_num, "DLL90 Setting 8:0",
6195 setting[8], setting[7], setting[6], setting[5], setting[4],
6196 setting[3], setting[2], setting[1], setting[0]);
6197
6198 process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset",
6199 c_cfg->dll_write_offset,
6200 "ddr%d_dll_write_offset_byte%d", 1);
6201 process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset",
6202 c_cfg->dll_read_offset,
6203 "ddr%d_dll_read_offset_byte%d", 2);
6204}
6205
6206#define SLOT_CTL_INCR(csr, chip, field, incr) \
6207 csr.chip.field = (csr.chip.field < (64 - incr)) ? \
6208 (csr.chip.field + incr) : 63
6209
6210#define INCR(csr, chip, field, incr) \
6211 csr.chip.field = (csr.chip.field < (64 - incr)) ? \
6212 (csr.chip.field + incr) : 63
6213
6214static void lmc_workaround_2(struct ddr_priv *priv)
6215{
6216 /* Workaround Errata 21063 */
6217 if (octeon_is_cpuid(OCTEON_CN78XX) ||
6218 octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6219 union cvmx_lmcx_slot_ctl0 slot_ctl0;
6220 union cvmx_lmcx_slot_ctl1 slot_ctl1;
6221 union cvmx_lmcx_slot_ctl2 slot_ctl2;
6222 union cvmx_lmcx_ext_config ext_cfg;
6223
6224 slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num));
6225 slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6226 slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6227
6228 ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
6229
6230 /* When ext_cfg.s.read_ena_bprch is set add 1 */
6231 if (ext_cfg.s.read_ena_bprch) {
6232 SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1);
6233 SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1);
6234 SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1);
6235 SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1);
6236 }
6237
6238 /* Always add 2 */
6239 SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2);
6240 SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2);
6241
6242 lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64);
6243 lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6244 lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6245 }
6246
6247 /* Workaround Errata 21216 */
6248 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) ||
6249 octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6250 union cvmx_lmcx_slot_ctl1 slot_ctl1;
6251 union cvmx_lmcx_slot_ctl2 slot_ctl2;
6252
6253 slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6254 slot_ctl1.cn78xx.w2w_xrank_init =
6255 max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init);
6256 lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6257
6258 slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6259 slot_ctl2.cn78xx.w2w_xdimm_init =
6260 max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init);
6261 lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6262 }
6263}
6264
6265static void lmc_final(struct ddr_priv *priv)
6266{
6267 /*
6268 * 4.8.11 Final LMC Initialization
6269 *
6270 * Early LMC initialization, LMC write-leveling, and LMC read-leveling
6271 * must be completed prior to starting this final LMC initialization.
6272 *
6273 * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
6274 * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
6275 * readleveling and write-leveling settings. Software should not write
6276 * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
6277 * values until after the final read-leveling and write-leveling
6278 * settings are written.
6279 *
6280 * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
6281 * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
6282 * select the minimum gaps between read operations and write operations
6283 * of various types.
6284 *
6285 * Software must not reduce the values in these CSR fields below the
6286 * values previously selected by the LMC hardware (during write-leveling
6287 * and read-leveling steps above).
6288 *
6289 * All sections in this chapter may be used to derive proper settings
6290 * for these registers.
6291 *
6292 * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
6293 * properly. This should be done prior to the first read.
6294 */
6295
6296 /* Clear any residual ECC errors */
6297 int num_tads = 1;
6298 int tad;
6299 int num_mcis = 1;
6300 int mci;
6301
6302 if (octeon_is_cpuid(OCTEON_CN78XX)) {
6303 num_tads = 8;
6304 num_mcis = 4;
6305 } else if (octeon_is_cpuid(OCTEON_CN70XX)) {
6306 num_tads = 1;
6307 num_mcis = 1;
6308 } else if (octeon_is_cpuid(OCTEON_CN73XX) ||
6309 octeon_is_cpuid(OCTEON_CNF75XX)) {
6310 num_tads = 4;
6311 num_mcis = 3;
6312 }
6313
6314 lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL);
6315 lmc_rd(priv, CVMX_LMCX_INT(if_num));
6316
6317 for (tad = 0; tad < num_tads; tad++) {
6318 l2c_wr(priv, CVMX_L2C_TADX_INT(tad),
6319 l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
6320 debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad,
6321 l2c_rd(priv, CVMX_L2C_TADX_INT(tad)));
6322 }
6323
6324 for (mci = 0; mci < num_mcis; mci++) {
6325 l2c_wr(priv, CVMX_L2C_MCIX_INT(mci),
6326 l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
6327 debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci,
6328 l2c_rd(priv, CVMX_L2C_MCIX_INT(mci)));
6329 }
6330
6331 debug("%-45s : 0x%08llx\n", "LMC_INT",
6332 lmc_rd(priv, CVMX_LMCX_INT(if_num)));
6333}
6334
6335static void lmc_scrambling(struct ddr_priv *priv)
6336{
6337 // Make sure scrambling is disabled during init...
6338 union cvmx_lmcx_control ctrl;
6339 union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0;
6340 union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1;
6341 union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2;
6342 union cvmx_lmcx_ns_ctl lmc_ns_ctl;
6343 int use_scramble = 0; // default OFF
6344 char *s;
6345
6346 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
6347 lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num));
6348 lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num));
6349 lmc_scramble_cfg2.u64 = 0; // quiet compiler
6350 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6351 lmc_scramble_cfg2.u64 =
6352 lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num));
6353 }
6354 lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num));
6355
6356 s = lookup_env_ull(priv, "ddr_use_scramble");
6357 if (s)
6358 use_scramble = simple_strtoull(s, NULL, 0);
6359
6360 /* Generate random values if scrambling is needed */
6361 if (use_scramble) {
6362 lmc_scramble_cfg0.u64 = cvmx_rng_get_random64();
6363 lmc_scramble_cfg1.u64 = cvmx_rng_get_random64();
6364 lmc_scramble_cfg2.u64 = cvmx_rng_get_random64();
6365 lmc_ns_ctl.s.ns_scramble_dis = 0;
6366 lmc_ns_ctl.s.adr_offset = 0;
6367 ctrl.s.scramble_ena = 1;
6368 }
6369
6370 s = lookup_env_ull(priv, "ddr_scramble_cfg0");
6371 if (s) {
6372 lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0);
6373 ctrl.s.scramble_ena = 1;
6374 }
6375 debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0",
6376 lmc_scramble_cfg0.u64);
6377
6378 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64);
6379
6380 s = lookup_env_ull(priv, "ddr_scramble_cfg1");
6381 if (s) {
6382 lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0);
6383 ctrl.s.scramble_ena = 1;
6384 }
6385 debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1",
6386 lmc_scramble_cfg1.u64);
6387 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64);
6388
6389 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6390 s = lookup_env_ull(priv, "ddr_scramble_cfg2");
6391 if (s) {
6392 lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0);
6393 ctrl.s.scramble_ena = 1;
6394 }
6395 debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2",
6396 lmc_scramble_cfg1.u64);
6397 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num),
6398 lmc_scramble_cfg2.u64);
6399 }
6400
6401 s = lookup_env_ull(priv, "ddr_ns_ctl");
6402 if (s)
6403 lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0);
6404 debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64);
6405 lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64);
6406
6407 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
6408}
6409
6410struct rl_score {
6411 u64 setting;
6412 int score;
6413};
6414
6415static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data");
6416static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data");
6417static unsigned char rodt_ctl __section(".data");
6418
6419static int rl_rodt_err __section(".data");
6420static unsigned char rtt_nom __section(".data");
6421static unsigned char rtt_idx __section(".data");
6422static char min_rtt_nom_idx __section(".data");
6423static char max_rtt_nom_idx __section(".data");
6424static char min_rodt_ctl __section(".data");
6425static char max_rodt_ctl __section(".data");
6426static int rl_dbg_loops __section(".data");
6427static unsigned char save_ddr2t __section(".data");
6428static int rl_samples __section(".data");
6429static char rl_compute __section(".data");
6430static char saved_ddr__ptune __section(".data");
6431static char saved_ddr__ntune __section(".data");
6432static char rl_comp_offs __section(".data");
6433static char saved_int_zqcs_dis __section(".data");
6434static int max_adj_rl_del_inc __section(".data");
6435static int print_nom_ohms __section(".data");
6436static int rl_print __section(".data");
6437
6438#ifdef ENABLE_HARDCODED_RLEVEL
6439static char part_number[21] __section(".data");
6440#endif /* ENABLE_HARDCODED_RLEVEL */
6441
6442struct perfect_counts {
6443 u16 count[9][32]; // 8+ECC by 64 values
6444 u32 mask[9]; // 8+ECC, bitmask of perfect delays
6445};
6446
6447static struct perfect_counts rank_perf[4] __section(".data");
6448static struct perfect_counts rodt_perfect_counts __section(".data");
6449static int pbm_lowsum_limit __section(".data");
6450// FIXME: PBM skip for RODT 240 and 34
6451static u32 pbm_rodt_skip __section(".data");
6452
6453// control rank majority processing
6454static int disable_rank_majority __section(".data");
6455
6456// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
6457// for DDR3
6458static int enable_rldelay_bump __section(".data");
6459static int rldelay_bump_incr __section(".data");
6460static int disable_rlv_bump_this_byte __section(".data");
6461static u64 value_mask __section(".data");
6462
6463static struct rlevel_byte_data rl_byte[9] __section(".data");
6464static int sample_loops __section(".data");
6465static int max_samples __section(".data");
6466static int rl_rank_errors __section(".data");
6467static int rl_mask_err __section(".data");
6468static int rl_nonseq_err __section(".data");
6469static struct rlevel_bitmask rl_mask[9] __section(".data");
6470static int rl_best_rank_score __section(".data");
6471
6472static int rodt_row_skip_mask __section(".data");
6473
6474static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score
6475 rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6476{
6477 union cvmx_lmcx_comp_ctl2 cc2;
6478 const int rl_separate_ab = 1;
6479 int i;
6480
6481 rl_best_rank_score = DEFAULT_BEST_RANK_SCORE;
6482 rl_rodt_err = 0;
6483 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6484 cc2.cn78xx.rodt_ctl = rodt_ctl;
6485 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
6486 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6487 udelay(1); /* Give it a little time to take affect */
6488 if (rl_print > 1) {
6489 debug("Read ODT_CTL : 0x%x (%d ohms)\n",
6490 cc2.cn78xx.rodt_ctl,
6491 imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
6492 }
6493
6494 memset(rl_byte, 0, sizeof(rl_byte));
6495 memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts));
6496
6497 // when iter RODT is the target RODT, take more samples...
6498 max_samples = rl_samples;
6499 if (rodt_ctl == default_rodt_ctl)
6500 max_samples += rl_samples + 1;
6501
6502 for (sample_loops = 0; sample_loops < max_samples; sample_loops++) {
6503 int redoing_nonseq_errs = 0;
6504
6505 rl_mask_err = 0;
6506
6507 if (!(rl_separate_ab && spd_rdimm &&
6508 ddr_type == DDR4_DRAM)) {
6509 /* Clear read-level delays */
6510 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6511
6512 /* read-leveling */
6513 oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6514
6515 do {
6516 rl_rank.u64 =
6517 lmc_rd(priv,
6518 CVMX_LMCX_RLEVEL_RANKX(rankx,
6519 if_num));
6520 } while (rl_rank.cn78xx.status != 3);
6521 }
6522
6523 rl_rank.u64 =
6524 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6525
6526 // start bitmask interpretation block
6527
6528 memset(rl_mask, 0, sizeof(rl_mask));
6529
6530 if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) {
6531 union cvmx_lmcx_rlevel_rankx rl_rank_aside;
6532 union cvmx_lmcx_modereg_params0 mp0;
6533
6534 /* A-side */
6535 mp0.u64 =
6536 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6537 mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6538 lmc_wr(priv,
6539 CVMX_LMCX_MODEREG_PARAMS0(if_num),
6540 mp0.u64);
6541
6542 /* Clear read-level delays */
6543 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6544
6545 /* read-leveling */
6546 oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6547
6548 do {
6549 rl_rank.u64 =
6550 lmc_rd(priv,
6551 CVMX_LMCX_RLEVEL_RANKX(rankx,
6552 if_num));
6553 } while (rl_rank.cn78xx.status != 3);
6554
6555 rl_rank.u64 =
6556 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6557 if_num));
6558
6559 rl_rank_aside.u64 = rl_rank.u64;
6560
6561 rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0);
6562 rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1);
6563 rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2);
6564 rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3);
6565 rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8);
6566 /* A-side complete */
6567
6568 /* B-side */
6569 mp0.u64 =
6570 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6571 mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */
6572 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6573 mp0.u64);
6574
6575 /* Clear read-level delays */
6576 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6577
6578 /* read-leveling */
6579 oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6580
6581 do {
6582 rl_rank.u64 =
6583 lmc_rd(priv,
6584 CVMX_LMCX_RLEVEL_RANKX(rankx,
6585 if_num));
6586 } while (rl_rank.cn78xx.status != 3);
6587
6588 rl_rank.u64 =
6589 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6590 if_num));
6591
6592 rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4);
6593 rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5);
6594 rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6);
6595 rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7);
6596 /* B-side complete */
6597
6598 upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0);
6599 upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1);
6600 upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2);
6601 upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3);
6602 /* ECC A-side */
6603 upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8);
6604
6605 mp0.u64 =
6606 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6607 mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6608 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6609 mp0.u64);
6610 }
6611
6612 /*
6613 * Evaluate the quality of the read-leveling delays from the
6614 * bitmasks. Also save off a software computed read-leveling
6615 * mask that may be used later to qualify the delay results
6616 * from Octeon.
6617 */
6618 for (i = 0; i < (8 + ecc_ena); ++i) {
6619 int bmerr;
6620
6621 if (!(if_bytemask & (1 << i)))
6622 continue;
6623 if (!(rl_separate_ab && spd_rdimm &&
6624 ddr_type == DDR4_DRAM)) {
6625 rl_mask[i].bm =
6626 lmc_ddr3_rl_dbg_read(priv, if_num, i);
6627 }
6628 bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i],
6629 ddr_type);
6630 rl_mask[i].errs = bmerr;
6631 rl_mask_err += bmerr;
6632 // count only the "perfect" bitmasks
6633 if (ddr_type == DDR4_DRAM && !bmerr) {
6634 int delay;
6635 // FIXME: for now, simple filtering:
6636 // do NOT count PBMs for RODTs in skip mask
6637 if ((1U << rodt_ctl) & pbm_rodt_skip)
6638 continue;
6639 // FIXME: could optimize this a bit?
6640 delay = get_rl_rank(&rl_rank, i);
6641 rank_perf[rankx].count[i][delay] += 1;
6642 rank_perf[rankx].mask[i] |=
6643 (1ULL << delay);
6644 rodt_perfect_counts.count[i][delay] += 1;
6645 rodt_perfect_counts.mask[i] |= (1ULL << delay);
6646 }
6647 }
6648
6649 /* Set delays for unused bytes to match byte 0. */
6650 for (i = 0; i < 9; ++i) {
6651 if (if_bytemask & (1 << i))
6652 continue;
6653 upd_rl_rank(&rl_rank, i, rl_rank.s.byte0);
6654 }
6655
6656 /*
6657 * Save a copy of the byte delays in physical
6658 * order for sequential evaluation.
6659 */
6660 unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank);
6661
6662 redo_nonseq_errs:
6663
6664 rl_nonseq_err = 0;
6665 if (!disable_sequential_delay_check) {
6666 for (i = 0; i < 9; ++i)
6667 rl_byte[i].sqerrs = 0;
6668
6669 if ((if_bytemask & 0xff) == 0xff) {
6670 /*
6671 * Evaluate delay sequence across the whole
6672 * range of bytes for standard dimms.
6673 */
6674 /* 1=RDIMM, 5=Mini-RDIMM */
6675 if (spd_dimm_type == 1 || spd_dimm_type == 5) {
6676 int reg_adj_del = abs(rl_byte[4].delay -
6677 rl_byte[5].delay);
6678
6679 /*
6680 * Registered dimm topology routes
6681 * from the center.
6682 */
6683 rl_nonseq_err +=
6684 nonseq_del(rl_byte, 0,
6685 3 + ecc_ena,
6686 max_adj_rl_del_inc);
6687 rl_nonseq_err +=
6688 nonseq_del(rl_byte, 5,
6689 7 + ecc_ena,
6690 max_adj_rl_del_inc);
6691 // byte 5 sqerrs never gets cleared
6692 // for RDIMMs
6693 rl_byte[5].sqerrs = 0;
6694 if (reg_adj_del > 1) {
6695 /*
6696 * Assess proximity of bytes on
6697 * opposite sides of register
6698 */
6699 rl_nonseq_err += (reg_adj_del -
6700 1) *
6701 RLEVEL_ADJACENT_DELAY_ERROR;
6702 // update byte 5 error
6703 rl_byte[5].sqerrs +=
6704 (reg_adj_del - 1) *
6705 RLEVEL_ADJACENT_DELAY_ERROR;
6706 }
6707 }
6708
6709 /* 2=UDIMM, 6=Mini-UDIMM */
6710 if (spd_dimm_type == 2 || spd_dimm_type == 6) {
6711 /*
6712 * Unbuffered dimm topology routes
6713 * from end to end.
6714 */
6715 rl_nonseq_err += nonseq_del(rl_byte, 0,
6716 7 + ecc_ena,
6717 max_adj_rl_del_inc);
6718 }
6719 } else {
6720 rl_nonseq_err += nonseq_del(rl_byte, 0,
6721 3 + ecc_ena,
6722 max_adj_rl_del_inc);
6723 }
6724 } /* if (! disable_sequential_delay_check) */
6725
6726 rl_rank_errors = rl_mask_err + rl_nonseq_err;
6727
6728 // print original sample here only if we are not really
6729 // averaging or picking best
6730 // also do not print if we were redoing the NONSEQ score
6731 // for using COMPUTED
6732 if (!redoing_nonseq_errs && rl_samples < 2) {
6733 if (rl_print > 1) {
6734 display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6735 display_rl_bm_scores(if_num, rankx, rl_mask,
6736 ecc_ena);
6737 display_rl_seq_scores(if_num, rankx, rl_byte,
6738 ecc_ena);
6739 }
6740 display_rl_with_score(if_num, rl_rank, rankx,
6741 rl_rank_errors);
6742 }
6743
6744 if (rl_compute) {
6745 if (!redoing_nonseq_errs) {
6746 /* Recompute the delays based on the bitmask */
6747 for (i = 0; i < (8 + ecc_ena); ++i) {
6748 if (!(if_bytemask & (1 << i)))
6749 continue;
6750
6751 upd_rl_rank(&rl_rank, i,
6752 compute_ddr3_rlevel_delay(
6753 rl_mask[i].mstart,
6754 rl_mask[i].width,
6755 rl_ctl));
6756 }
6757
6758 /*
6759 * Override the copy of byte delays with the
6760 * computed results.
6761 */
6762 unpack_rlevel_settings(if_bytemask, ecc_ena,
6763 rl_byte, rl_rank);
6764
6765 redoing_nonseq_errs = 1;
6766 goto redo_nonseq_errs;
6767
6768 } else {
6769 /*
6770 * now print this if already printed the
6771 * original sample
6772 */
6773 if (rl_samples < 2 || rl_print) {
6774 display_rl_with_computed(if_num,
6775 rl_rank, rankx,
6776 rl_rank_errors);
6777 }
6778 }
6779 } /* if (rl_compute) */
6780
6781 // end bitmask interpretation block
6782
6783 // if it is a better (lower) score, then keep it
6784 if (rl_rank_errors < rl_best_rank_score) {
6785 rl_best_rank_score = rl_rank_errors;
6786
6787 // save the new best delays and best errors
6788 for (i = 0; i < (8 + ecc_ena); ++i) {
6789 rl_byte[i].best = rl_byte[i].delay;
6790 rl_byte[i].bestsq = rl_byte[i].sqerrs;
6791 // save bitmasks and their scores as well
6792 // xlate UNPACKED index to PACKED index to
6793 // get from rl_mask
6794 rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm;
6795 rl_byte[i].bmerrs =
6796 rl_mask[XUP(i, !!ecc_ena)].errs;
6797 }
6798 }
6799
6800 rl_rodt_err += rl_rank_errors;
6801 }
6802
6803 /* We recorded the best score across the averaging loops */
6804 rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score;
6805
6806 /*
6807 * Restore the delays from the best fields that go with the best
6808 * score
6809 */
6810 for (i = 0; i < 9; ++i) {
6811 rl_byte[i].delay = rl_byte[i].best;
6812 rl_byte[i].sqerrs = rl_byte[i].bestsq;
6813 }
6814
6815 rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6816
6817 pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank);
6818
6819 if (rl_samples > 1) {
6820 // restore the "best" bitmasks and their scores for printing
6821 for (i = 0; i < 9; ++i) {
6822 if ((if_bytemask & (1 << i)) == 0)
6823 continue;
6824 // xlate PACKED index to UNPACKED index to get from
6825 // rl_byte
6826 rl_mask[i].bm = rl_byte[XPU(i, !!ecc_ena)].bm;
6827 rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs;
6828 }
6829
6830 // maybe print bitmasks/scores here
6831 if (rl_print > 1) {
6832 display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6833 display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena);
6834 display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena);
6835
6836 display_rl_with_rodt(if_num, rl_rank, rankx,
6837 rl_score[rtt_nom][rodt_ctl][rankx].score,
6838 print_nom_ohms,
6839 imp_val->rodt_ohms[rodt_ctl],
6840 WITH_RODT_BESTSCORE);
6841
6842 debug("-----------\n");
6843 }
6844 }
6845
6846 rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64;
6847
6848 // print out the PBMs for the current RODT
6849 if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity?
6850 // FIXME: change verbosity level after debug complete...
6851
6852 for (i = 0; i < 9; i++) {
6853 u64 temp_mask;
6854 int num_values;
6855
6856 // FIXME: PBM skip for RODTs in mask
6857 if ((1U << rodt_ctl) & pbm_rodt_skip)
6858 continue;
6859
6860 temp_mask = rodt_perfect_counts.mask[i];
6861 num_values = __builtin_popcountll(temp_mask);
6862 i = __builtin_ffsll(temp_mask) - 1;
6863
6864 debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ",
6865 node, if_num, rankx,
6866 imp_val->rodt_ohms[rodt_ctl],
6867 i, temp_mask >> i, num_values);
6868
6869 while (temp_mask != 0) {
6870 i = __builtin_ffsll(temp_mask) - 1;
6871 debug("%2d(%2d) ", i,
6872 rodt_perfect_counts.count[i][i]);
6873 temp_mask &= ~(1UL << i);
6874 } /* while (temp_mask != 0) */
6875 debug("\n");
6876 }
6877 }
6878}
6879
6880static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score
6881 rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6882{
6883 /* Start with an arbitrarily high score */
6884 int best_rank_score = DEFAULT_BEST_RANK_SCORE;
6885 int best_rank_rtt_nom = 0;
6886 int best_rank_ctl = 0;
6887 int best_rank_ohms = 0;
6888 int best_rankx = 0;
6889 int dimm_rank_mask;
6890 int max_rank_score;
6891 union cvmx_lmcx_rlevel_rankx saved_rl_rank;
6892 int next_ohms;
6893 int orankx;
6894 int next_score = 0;
6895 int best_byte, new_byte, temp_byte, orig_best_byte;
6896 int rank_best_bytes[9];
6897 int byte_sh;
6898 int avg_byte;
6899 int avg_diff;
6900 int i;
6901
6902 if (!(rank_mask & (1 << rankx)))
6903 return;
6904
6905 // some of the rank-related loops below need to operate only on
6906 // the ranks of a single DIMM,
6907 // so create a mask for their use here
6908 if (num_ranks == 4) {
6909 dimm_rank_mask = rank_mask; // should be 1111
6910 } else {
6911 dimm_rank_mask = rank_mask & 3; // should be 01 or 11
6912 if (rankx >= 2) {
6913 // doing a rank on the second DIMM, should be
6914 // 0100 or 1100
6915 dimm_rank_mask <<= 2;
6916 }
6917 }
6918 debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n",
6919 dimm_rank_mask, rank_mask, rankx);
6920
6921 // this is the start of the BEST ROW SCORE LOOP
6922
6923 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6924 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
6925
6926 debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
6927 node, if_num, rankx, rtt_nom,
6928 imp_val->rtt_nom_ohms[rtt_nom]);
6929
6930 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
6931 --rodt_ctl) {
6932 next_ohms = imp_val->rodt_ohms[rodt_ctl];
6933
6934 // skip RODT rows in mask, but *NOT* rows with too
6935 // high a score;
6936 // we will not use the skipped ones for printing or
6937 // evaluating, but we need to allow all the
6938 // non-skipped ones to be candidates for "best"
6939 if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
6940 debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
6941 node, if_num, rankx, rodt_ctl,
6942 next_ohms, next_score);
6943 continue;
6944 }
6945
6946 // this is ROFFIX-0528
6947 for (orankx = 0; orankx < dimm_count * 4; orankx++) {
6948 // stay on the same DIMM
6949 if (!(dimm_rank_mask & (1 << orankx)))
6950 continue;
6951
6952 next_score = rl_score[rtt_nom][rodt_ctl][orankx].score;
6953
6954 // always skip a higher score
6955 if (next_score > best_rank_score)
6956 continue;
6957
6958 // if scores are equal
6959 if (next_score == best_rank_score) {
6960 // always skip lower ohms
6961 if (next_ohms < best_rank_ohms)
6962 continue;
6963
6964 // if same ohms
6965 if (next_ohms == best_rank_ohms) {
6966 // always skip the other rank(s)
6967 if (orankx != rankx)
6968 continue;
6969 }
6970 // else next_ohms are greater,
6971 // always choose it
6972 }
6973 // else next_score is less than current best,
6974 // so always choose it
6975 debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
6976 node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
6977 best_rank_score, best_rank_ohms);
6978 best_rank_score = next_score;
6979 best_rank_rtt_nom = rtt_nom;
6980 //best_rank_nom_ohms = rtt_nom_ohms;
6981 best_rank_ctl = rodt_ctl;
6982 best_rank_ohms = next_ohms;
6983 best_rankx = orankx;
6984 rl_rank.u64 =
6985 rl_score[rtt_nom][rodt_ctl][orankx].setting;
6986 }
6987 }
6988 }
6989
6990 // this is the end of the BEST ROW SCORE LOOP
6991
6992 // DANGER, Will Robinson!! Abort now if we did not find a best
6993 // score at all...
6994 if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
6995 printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n",
6996 node, if_num, rankx);
6997 mdelay(500);
6998 do_reset(NULL, 0, 0, NULL);
6999 }
7000
7001 // FIXME: relative now, but still arbitrary...
7002 max_rank_score = best_rank_score;
7003 if (ddr_type == DDR4_DRAM) {
7004 // halve the range if 2 DIMMs unless they are single rank...
7005 max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ?
7006 dimm_count : 1));
7007 } else {
7008 // Since DDR3 typically has a wider score range,
7009 // keep more of them always
7010 max_rank_score += MAX_RANK_SCORE_LIMIT;
7011 }
7012
7013 if (!ecc_ena) {
7014 /* ECC is not used */
7015 rl_rank.s.byte8 = rl_rank.s.byte0;
7016 }
7017
7018 // at the end, write the best row settings to the current rank
7019 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64);
7020 rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7021
7022 saved_rl_rank.u64 = rl_rank.u64;
7023
7024 // this is the start of the PRINT LOOP
7025 int pass;
7026
7027 // for pass==0, print current rank, pass==1 print other rank(s)
7028 // this is done because we want to show each ranks RODT values
7029 // together, not interlaced
7030 // keep separates for ranks - pass=0 target rank, pass=1 other
7031 // rank on DIMM
7032 int mask_skipped[2] = {0, 0};
7033 int score_skipped[2] = {0, 0};
7034 int selected_rows[2] = {0, 0};
7035 int zero_scores[2] = {0, 0};
7036 for (pass = 0; pass < 2; pass++) {
7037 for (orankx = 0; orankx < dimm_count * 4; orankx++) {
7038 // stay on the same DIMM
7039 if (!(dimm_rank_mask & (1 << orankx)))
7040 continue;
7041
7042 if ((pass == 0 && orankx != rankx) ||
7043 (pass != 0 && orankx == rankx))
7044 continue;
7045
7046 for (rtt_idx = min_rtt_nom_idx;
7047 rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
7048 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7049 if (dyn_rtt_nom_mask == 0) {
7050 print_nom_ohms = -1;
7051 } else {
7052 print_nom_ohms =
7053 imp_val->rtt_nom_ohms[rtt_nom];
7054 }
7055
7056 // cycle through all the RODT values...
7057 for (rodt_ctl = max_rodt_ctl;
7058 rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
7059 union cvmx_lmcx_rlevel_rankx
7060 temp_rl_rank;
7061 int temp_score =
7062 rl_score[rtt_nom][rodt_ctl][orankx].score;
7063 int skip_row;
7064
7065 temp_rl_rank.u64 =
7066 rl_score[rtt_nom][rodt_ctl][orankx].setting;
7067
7068 // skip RODT rows in mask, or rows
7069 // with too high a score;
7070 // we will not use them for printing
7071 // or evaluating...
7072 if ((1 << rodt_ctl) &
7073 rodt_row_skip_mask) {
7074 skip_row = WITH_RODT_SKIPPING;
7075 ++mask_skipped[pass];
7076 } else if (temp_score >
7077 max_rank_score) {
7078 skip_row = WITH_RODT_SKIPPING;
7079 ++score_skipped[pass];
7080 } else {
7081 skip_row = WITH_RODT_BLANK;
7082 ++selected_rows[pass];
7083 if (temp_score == 0)
7084 ++zero_scores[pass];
7085 }
7086
7087 // identify and print the BEST ROW
7088 // when it comes up
7089 if (skip_row == WITH_RODT_BLANK &&
7090 best_rankx == orankx &&
7091 best_rank_rtt_nom == rtt_nom &&
7092 best_rank_ctl == rodt_ctl)
7093 skip_row = WITH_RODT_BESTROW;
7094
7095 if (rl_print) {
7096 display_rl_with_rodt(if_num,
7097 temp_rl_rank, orankx, temp_score,
7098 print_nom_ohms,
7099 imp_val->rodt_ohms[rodt_ctl],
7100 skip_row);
7101 }
7102 }
7103 }
7104 }
7105 }
7106 debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
7107 node, if_num, rankx, selected_rows[0], selected_rows[1],
7108 zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1],
7109 score_skipped[0], score_skipped[1]);
7110 // this is the end of the PRINT LOOP
7111
7112 // now evaluate which bytes need adjusting
7113 // collect the new byte values; first init with current best for
7114 // neighbor use
7115 for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7116 rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) &
7117 RLEVEL_BYTE_MSK;
7118 }
7119
7120 // this is the start of the BEST BYTE LOOP
7121
7122 for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7123 int sum = 0, count = 0;
7124 int count_less = 0, count_same = 0, count_more = 0;
7125 int count_byte; // save the value we counted around
7126 // for rank majority use
7127 int rank_less = 0, rank_same = 0, rank_more = 0;
7128 int neighbor;
7129 int neigh_byte;
7130
7131 best_byte = rank_best_bytes[i];
7132 orig_best_byte = rank_best_bytes[i];
7133
7134 // this is the start of the BEST BYTE AVERAGING LOOP
7135
7136 // validate the initial "best" byte by looking at the
7137 // average of the unskipped byte-column entries
7138 // we want to do this before we go further, so we can
7139 // try to start with a better initial value
7140 // this is the so-called "BESTBUY" patch set
7141
7142 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7143 ++rtt_idx) {
7144 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7145
7146 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7147 --rodt_ctl) {
7148 union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7149 int temp_score;
7150
7151 // average over all the ranks
7152 for (orankx = 0; orankx < dimm_count * 4;
7153 orankx++) {
7154 // stay on the same DIMM
7155 if (!(dimm_rank_mask & (1 << orankx)))
7156 continue;
7157
7158 temp_score =
7159 rl_score[rtt_nom][rodt_ctl][orankx].score;
7160 // skip RODT rows in mask, or rows with
7161 // too high a score;
7162 // we will not use them for printing or
7163 // evaluating...
7164
7165 if (!((1 << rodt_ctl) &
7166 rodt_row_skip_mask) &&
7167 temp_score <= max_rank_score) {
7168 temp_rl_rank.u64 =
7169 rl_score[rtt_nom][rodt_ctl][orankx].setting;
7170 temp_byte =
7171 (int)(temp_rl_rank.u64 >> byte_sh) &
7172 RLEVEL_BYTE_MSK;
7173 sum += temp_byte;
7174 count++;
7175 }
7176 }
7177 }
7178 }
7179
7180 // this is the end of the BEST BYTE AVERAGING LOOP
7181
7182 // FIXME: validate count and sum??
7183 avg_byte = (int)divide_nint(sum, count);
7184 avg_diff = best_byte - avg_byte;
7185 new_byte = best_byte;
7186 if (avg_diff != 0) {
7187 // bump best up/dn by 1, not necessarily all the
7188 // way to avg
7189 new_byte = best_byte + ((avg_diff > 0) ? -1 : 1);
7190 }
7191
7192 if (rl_print) {
7193 debug("N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n",
7194 node, if_num, rankx,
7195 i, best_byte, avg_diff, avg_byte, new_byte);
7196 }
7197 best_byte = new_byte;
7198 count_byte = new_byte; // save the value we will count around
7199
7200 // At this point best_byte is either:
7201 // 1. the original byte-column value from the best scoring
7202 // RODT row, OR
7203 // 2. that value bumped toward the average of all the
7204 // byte-column values
7205 //
7206 // best_byte will not change from here on...
7207
7208 // this is the start of the BEST BYTE COUNTING LOOP
7209
7210 // NOTE: we do this next loop separately from above, because
7211 // we count relative to "best_byte"
7212 // which may have been modified by the above averaging
7213 // operation...
7214
7215 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7216 ++rtt_idx) {
7217 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7218
7219 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7220 --rodt_ctl) {
7221 union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7222 int temp_score;
7223
7224 for (orankx = 0; orankx < dimm_count * 4;
7225 orankx++) { // count over all the ranks
7226 // stay on the same DIMM
7227 if (!(dimm_rank_mask & (1 << orankx)))
7228 continue;
7229
7230 temp_score =
7231 rl_score[rtt_nom][rodt_ctl][orankx].score;
7232 // skip RODT rows in mask, or rows
7233 // with too high a score;
7234 // we will not use them for printing
7235 // or evaluating...
7236 if (((1 << rodt_ctl) &
7237 rodt_row_skip_mask) ||
7238 temp_score > max_rank_score)
7239 continue;
7240
7241 temp_rl_rank.u64 =
7242 rl_score[rtt_nom][rodt_ctl][orankx].setting;
7243 temp_byte = (temp_rl_rank.u64 >>
7244 byte_sh) & RLEVEL_BYTE_MSK;
7245
7246 if (temp_byte == 0)
7247 ; // do not count it if illegal
7248 else if (temp_byte == best_byte)
7249 count_same++;
7250 else if (temp_byte == best_byte - 1)
7251 count_less++;
7252 else if (temp_byte == best_byte + 1)
7253 count_more++;
7254 // else do not count anything more
7255 // than 1 away from the best
7256
7257 // no rank counting if disabled
7258 if (disable_rank_majority)
7259 continue;
7260
7261 // FIXME? count is relative to
7262 // best_byte; should it be rank-based?
7263 // rank counts only on main rank
7264 if (orankx != rankx)
7265 continue;
7266 else if (temp_byte == best_byte)
7267 rank_same++;
7268 else if (temp_byte == best_byte - 1)
7269 rank_less++;
7270 else if (temp_byte == best_byte + 1)
7271 rank_more++;
7272 }
7273 }
7274 }
7275
7276 if (rl_print) {
7277 debug("N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
7278 node, if_num, rankx,
7279 i, orig_best_byte, best_byte,
7280 count_more, count_same, count_less,
7281 rank_more, rank_same, rank_less);
7282 }
7283
7284 // this is the end of the BEST BYTE COUNTING LOOP
7285
7286 // choose the new byte value
7287 // we need to check that there is no gap greater than 2
7288 // between adjacent bytes (adjacency depends on DIMM type)
7289 // use the neighbor value to help decide
7290 // initially, the rank_best_bytes[] will contain values from
7291 // the chosen lowest score rank
7292 new_byte = 0;
7293
7294 // neighbor is index-1 unless we are index 0 or index 8 (ECC)
7295 neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1);
7296 neigh_byte = rank_best_bytes[neighbor];
7297
7298 // can go up or down or stay the same, so look at a numeric
7299 // average to help
7300 new_byte = (int)divide_nint(((count_more * (best_byte + 1)) +
7301 (count_same * (best_byte + 0)) +
7302 (count_less * (best_byte - 1))),
7303 max(1, (count_more + count_same +
7304 count_less)));
7305
7306 // use neighbor to help choose with average
7307 if (i > 0 && (abs(neigh_byte - new_byte) > 2) &&
7308 !disable_sequential_delay_check) {
7309 // but not for byte 0
7310 int avg_pick = new_byte;
7311
7312 if ((new_byte - best_byte) != 0) {
7313 // back to best, average did not get better
7314 new_byte = best_byte;
7315 } else {
7316 // avg was the same, still too far, now move
7317 // it towards the neighbor
7318 new_byte += (neigh_byte > new_byte) ? 1 : -1;
7319 }
7320
7321 if (rl_print) {
7322 debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
7323 node, if_num, rankx,
7324 i, neighbor, neigh_byte, avg_pick,
7325 new_byte);
7326 }
7327 } else {
7328 // NOTE:
7329 // For now, we let the neighbor processing above trump
7330 // the new simple majority processing here.
7331 // This is mostly because we have seen no smoking gun
7332 // for a neighbor bad choice (yet?).
7333 // Also note that we will ALWAYS be using byte 0
7334 // majority, because of the if clause above.
7335
7336 // majority is dependent on the counts, which are
7337 // relative to best_byte, so start there
7338 int maj_byte = best_byte;
7339 int rank_maj;
7340 int rank_sum;
7341
7342 if (count_more > count_same &&
7343 count_more > count_less) {
7344 maj_byte++;
7345 } else if (count_less > count_same &&
7346 count_less > count_more) {
7347 maj_byte--;
7348 }
7349
7350 if (maj_byte != new_byte) {
7351 // print only when majority choice is
7352 // different from average
7353 if (rl_print) {
7354 debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
7355 node, if_num, rankx, i, maj_byte,
7356 new_byte);
7357 }
7358 new_byte = maj_byte;
7359 } else {
7360 if (rl_print) {
7361 debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7362 node, if_num, rankx, i, new_byte);
7363 }
7364 }
7365
7366 if (!disable_rank_majority) {
7367 // rank majority is dependent on the rank
7368 // counts, which are relative to best_byte,
7369 // so start there, and adjust according to the
7370 // rank counts majority
7371 rank_maj = best_byte;
7372 if (rank_more > rank_same &&
7373 rank_more > rank_less) {
7374 rank_maj++;
7375 } else if (rank_less > rank_same &&
7376 rank_less > rank_more) {
7377 rank_maj--;
7378 }
7379 rank_sum = rank_more + rank_same + rank_less;
7380
7381 // now, let rank majority possibly rule over
7382 // the current new_byte however we got it
7383 if (rank_maj != new_byte) { // only if different
7384 // Here is where we decide whether to
7385 // completely apply RANK_MAJORITY or not
7386 // ignore if less than
7387 if (rank_maj < new_byte) {
7388 if (rl_print) {
7389 debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n",
7390 node, if_num,
7391 rankx, i,
7392 rank_maj,
7393 new_byte);
7394 }
7395 } else {
7396 // For the moment, we do it
7397 // ONLY when running 2-slot
7398 // configs
7399 // OR when rank_sum is big
7400 // enough
7401 if (dimm_count > 1 ||
7402 rank_sum > 2) {
7403 // print only when rank
7404 // majority choice is
7405 // selected
7406 if (rl_print) {
7407 debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
7408 node,
7409 if_num,
7410 rankx,
7411 i,
7412 rank_maj,
7413 new_byte);
7414 }
7415 new_byte = rank_maj;
7416 } else {
7417 // FIXME: print some
7418 // info when we could
7419 // have chosen RANKMAJ
7420 // but did not
7421 if (rl_print) {
7422 debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
7423 node,
7424 if_num,
7425 rankx,
7426 i,
7427 rank_maj,
7428 new_byte,
7429 best_byte,
7430 rank_sum);
7431 }
7432 }
7433 }
7434 }
7435 } /* if (!disable_rank_majority) */
7436 }
7437 // one last check:
7438 // if new_byte is still count_byte, BUT there was no count
7439 // for that value, DO SOMETHING!!!
7440 // FIXME: go back to original best byte from the best row
7441 if (new_byte == count_byte && count_same == 0) {
7442 new_byte = orig_best_byte;
7443 if (rl_print) {
7444 debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
7445 node, if_num, rankx, i, new_byte);
7446 }
7447 }
7448 // Look at counts for "perfect" bitmasks (PBMs) if we had
7449 // any for this byte-lane.
7450 // Remember, we only counted for DDR4, so zero means none
7451 // or DDR3, and we bypass this...
7452 value_mask = rank_perf[rankx].mask[i];
7453 disable_rlv_bump_this_byte = 0;
7454
7455 if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) {
7456 int i, delay_count, delay_max = 0, del_val = 0;
7457 int num_values = __builtin_popcountll(value_mask);
7458 int sum_counts = 0;
7459 u64 temp_mask = value_mask;
7460
7461 disable_rlv_bump_this_byte = 1;
7462 i = __builtin_ffsll(temp_mask) - 1;
7463 if (rl_print)
7464 debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ",
7465 node, if_num, rankx, i, value_mask >> i,
7466 num_values);
7467
7468 while (temp_mask != 0) {
7469 i = __builtin_ffsll(temp_mask) - 1;
7470 delay_count = rank_perf[rankx].count[i][i];
7471 sum_counts += delay_count;
7472 if (rl_print)
7473 debug("%2d(%2d) ", i, delay_count);
7474 if (delay_count >= delay_max) {
7475 delay_max = delay_count;
7476 del_val = i;
7477 }
7478 temp_mask &= ~(1UL << i);
7479 } /* while (temp_mask != 0) */
7480
7481 // if sum_counts is small, just use NEW_BYTE
7482 if (sum_counts < pbm_lowsum_limit) {
7483 if (rl_print)
7484 debug(": LOWSUM (%2d), choose ORIG ",
7485 sum_counts);
7486 del_val = new_byte;
7487 delay_max = rank_perf[rankx].count[i][del_val];
7488 }
7489
7490 // finish printing here...
7491 if (rl_print) {
7492 debug(": USING %2d (%2d) D%d\n", del_val,
7493 delay_max, disable_rlv_bump_this_byte);
7494 }
7495
7496 new_byte = del_val; // override with best PBM choice
7497
7498 } else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) {
7499 // if (value_mask != 0) {
7500 int i, delay_count, del_val;
7501 int num_values = __builtin_popcountll(value_mask);
7502 int sum_counts = 0;
7503 u64 temp_mask = value_mask;
7504
7505 i = __builtin_ffsll(temp_mask) - 1;
7506 if (rl_print)
7507 debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ",
7508 node, if_num, rankx, i, value_mask >> i,
7509 num_values);
7510 while (temp_mask != 0) {
7511 i = __builtin_ffsll(temp_mask) - 1;
7512 delay_count = rank_perf[rankx].count[i][i];
7513 sum_counts += delay_count;
7514 if (rl_print)
7515 debug("%2d(%2d) ", i, delay_count);
7516 temp_mask &= ~(1UL << i);
7517 } /* while (temp_mask != 0) */
7518
7519 del_val = __builtin_ffsll(value_mask) - 1;
7520 delay_count =
7521 rank_perf[rankx].count[i][del_val];
7522
7523 // overkill, normally only 1-4 bits
7524 i = (value_mask >> del_val) & 0x1F;
7525
7526 // if sum_counts is small, treat as special and use
7527 // NEW_BYTE
7528 if (sum_counts < pbm_lowsum_limit) {
7529 if (rl_print)
7530 debug(": LOWSUM (%2d), choose ORIG",
7531 sum_counts);
7532 i = 99; // SPECIAL case...
7533 }
7534
7535 switch (i) {
7536 case 0x01 /* 00001b */:
7537 // allow BUMP
7538 break;
7539
7540 case 0x13 /* 10011b */:
7541 case 0x0B /* 01011b */:
7542 case 0x03 /* 00011b */:
7543 del_val += 1; // take the second
7544 disable_rlv_bump_this_byte = 1; // allow no BUMP
7545 break;
7546
7547 case 0x0D /* 01101b */:
7548 case 0x05 /* 00101b */:
7549 // test count of lowest and all
7550 if (delay_count >= 5 || sum_counts <= 5)
7551 del_val += 1; // take the hole
7552 else
7553 del_val += 2; // take the next set
7554 disable_rlv_bump_this_byte = 1; // allow no BUMP
7555 break;
7556
7557 case 0x0F /* 01111b */:
7558 case 0x17 /* 10111b */:
7559 case 0x07 /* 00111b */:
7560 del_val += 1; // take the second
7561 if (delay_count < 5) { // lowest count is small
7562 int second =
7563 rank_perf[rankx].count[i][del_val];
7564 int third =
7565 rank_perf[rankx].count[i][del_val + 1];
7566 // test if middle is more than 1 OR
7567 // top is more than 1;
7568 // this means if they are BOTH 1,
7569 // then we keep the second...
7570 if (second > 1 || third > 1) {
7571 // if middle is small OR top
7572 // is large
7573 if (second < 5 ||
7574 third > 1) {
7575 // take the top
7576 del_val += 1;
7577 if (rl_print)
7578 debug(": TOP7 ");
7579 }
7580 }
7581 }
7582 disable_rlv_bump_this_byte = 1; // allow no BUMP
7583 break;
7584
7585 default: // all others...
7586 if (rl_print)
7587 debug(": ABNORMAL, choose ORIG");
7588
7589 case 99: // special
7590 // FIXME: choose original choice?
7591 del_val = new_byte;
7592 disable_rlv_bump_this_byte = 1; // allow no BUMP
7593 break;
7594 }
7595 delay_count =
7596 rank_perf[rankx].count[i][del_val];
7597
7598 // finish printing here...
7599 if (rl_print)
7600 debug(": USING %2d (%2d) D%d\n", del_val,
7601 delay_count, disable_rlv_bump_this_byte);
7602 new_byte = del_val; // override with best PBM choice
7603 } else {
7604 if (ddr_type == DDR4_DRAM) { // only report when DDR4
7605 // FIXME: remove or increase VBL for this
7606 // output...
7607 if (rl_print)
7608 debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n",
7609 node, if_num, rankx, i,
7610 new_byte);
7611 // prevent ODD bump, rely on original
7612 disable_rlv_bump_this_byte = 1;
7613 }
7614 } /* if (value_mask != 0) */
7615
7616 // optionally bump the delay value
7617 if (enable_rldelay_bump && !disable_rlv_bump_this_byte) {
7618 if ((new_byte & enable_rldelay_bump) ==
7619 enable_rldelay_bump) {
7620 int bump_value = new_byte + rldelay_bump_incr;
7621
7622 if (rl_print) {
7623 debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n",
7624 node, if_num, rankx, i,
7625 new_byte, bump_value,
7626 (value_mask &
7627 (1 << bump_value)) ?
7628 "PBM" : "NOPBM");
7629 }
7630 new_byte = bump_value;
7631 }
7632 }
7633
7634 // last checks for count-related purposes
7635 if (new_byte == best_byte && count_more > 0 &&
7636 count_less == 0) {
7637 // we really should take best_byte + 1
7638 if (rl_print) {
7639 debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n",
7640 node, if_num, rankx, i,
7641 new_byte, best_byte + 1);
7642 new_byte = best_byte + 1;
7643 }
7644 } else if ((new_byte < best_byte) && (count_same > 0)) {
7645 // we really should take best_byte
7646 if (rl_print) {
7647 debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n",
7648 node, if_num, rankx, i,
7649 new_byte, best_byte);
7650 new_byte = best_byte;
7651 }
7652 } else if (new_byte > best_byte) {
7653 if ((new_byte == (best_byte + 1)) &&
7654 count_more == 0 && count_less > 0) {
7655 // we really should take best_byte
7656 if (rl_print) {
7657 debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n",
7658 node, if_num, rankx, i,
7659 new_byte, best_byte);
7660 new_byte = best_byte;
7661 }
7662 } else if ((new_byte >= (best_byte + 2)) &&
7663 ((count_more > 0) || (count_same > 0))) {
7664 if (rl_print) {
7665 debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n",
7666 node, if_num, rankx, i,
7667 new_byte, best_byte + 1);
7668 new_byte = best_byte + 1;
7669 }
7670 }
7671 }
7672
7673 if (rl_print) {
7674 debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n",
7675 node, if_num, rankx, i, orig_best_byte,
7676 best_byte, count_more, count_same, count_less,
7677 new_byte);
7678 }
7679
7680 // update the byte with the new value (NOTE: orig value in
7681 // the CSR may not be current "best")
7682 upd_rl_rank(&rl_rank, i, new_byte);
7683
7684 // save new best for neighbor use
7685 rank_best_bytes[i] = new_byte;
7686 } /* for (i = 0; i < 8+ecc_ena; i++) */
7687
7688 ////////////////// this is the end of the BEST BYTE LOOP
7689
7690 if (saved_rl_rank.u64 != rl_rank.u64) {
7691 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
7692 rl_rank.u64);
7693 rl_rank.u64 = lmc_rd(priv,
7694 CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7695 debug("Adjusting Read-Leveling per-RANK settings.\n");
7696 } else {
7697 debug("Not Adjusting Read-Leveling per-RANK settings.\n");
7698 }
7699 display_rl_with_final(if_num, rl_rank, rankx);
7700
7701 // FIXME: does this help make the output a little easier to focus?
7702 if (rl_print > 0)
7703 debug("-----------\n");
7704
7705#define RLEVEL_RANKX_EXTRAS_INCR 0
7706 // if there are unused entries to be filled
7707 if ((rank_mask & 0x0f) != 0x0f) {
7708 // copy the current rank
7709 union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank;
7710
7711 if (rankx < 3) {
7712#if RLEVEL_RANKX_EXTRAS_INCR > 0
7713 int byte, delay;
7714
7715 // modify the copy in prep for writing to empty slot(s)
7716 for (byte = 0; byte < 9; byte++) {
7717 delay = get_rl_rank(&temp_rl_rank, byte) +
7718 RLEVEL_RANKX_EXTRAS_INCR;
7719 if (delay > RLEVEL_BYTE_MSK)
7720 delay = RLEVEL_BYTE_MSK;
7721 upd_rl_rank(&temp_rl_rank, byte, delay);
7722 }
7723#endif
7724
7725 // if rank 0, write rank 1 and rank 2 here if empty
7726 if (rankx == 0) {
7727 // check that rank 1 is empty
7728 if (!(rank_mask & (1 << 1))) {
7729 debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7730 node, if_num, rankx, 1);
7731 lmc_wr(priv,
7732 CVMX_LMCX_RLEVEL_RANKX(1,
7733 if_num),
7734 temp_rl_rank.u64);
7735 }
7736
7737 // check that rank 2 is empty
7738 if (!(rank_mask & (1 << 2))) {
7739 debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7740 node, if_num, rankx, 2);
7741 lmc_wr(priv,
7742 CVMX_LMCX_RLEVEL_RANKX(2,
7743 if_num),
7744 temp_rl_rank.u64);
7745 }
7746 }
7747
7748 // if ranks 0, 1 or 2, write rank 3 here if empty
7749 // check that rank 3 is empty
7750 if (!(rank_mask & (1 << 3))) {
7751 debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7752 node, if_num, rankx, 3);
7753 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num),
7754 temp_rl_rank.u64);
7755 }
7756 }
7757 }
7758}
7759
7760static void lmc_read_leveling(struct ddr_priv *priv)
7761{
7762 struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
7763 union cvmx_lmcx_control ctl;
7764 union cvmx_lmcx_config cfg;
7765 int rankx;
7766 char *s;
7767 int i;
7768
7769 /*
7770 * 4.8.10 LMC Read Leveling
7771 *
7772 * LMC supports an automatic read-leveling separately per byte-lane
7773 * using the DDR3 multipurpose register predefined pattern for system
7774 * calibration defined in the JEDEC DDR3 specifications.
7775 *
7776 * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
7777 * must be completed prior to starting this LMC read-leveling sequence.
7778 *
7779 * Software could simply write the desired read-leveling values into
7780 * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
7781 * LMC's autoread-leveling capabilities.
7782 *
7783 * When LMC does the read-leveling sequence for a rank, it first enables
7784 * the DDR3 multipurpose register predefined pattern for system
7785 * calibration on the selected DRAM rank via a DDR3 MR3 write, then
7786 * executes 64 RD operations at different internal delay settings, then
7787 * disables the predefined pattern via another DDR3 MR3 write
7788 * operation. LMC determines the pass or fail of each of the 64 settings
7789 * independently for each byte lane, then writes appropriate
7790 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
7791 *
7792 * After read-leveling for a rank, software can read the 64 pass/fail
7793 * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK].
7794 * Software can observe all pass/fail results for all byte lanes in a
7795 * rank via separate read-leveling sequences on the rank with different
7796 * LMC(0)_RLEVEL_CTL[BYTE] values.
7797 *
7798 * The 64 pass/fail results will typically have failures for the low
7799 * delays, followed by a run of some passing settings, followed by more
7800 * failures in the remaining high delays. LMC sets
7801 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
7802 * First, LMC selects the longest run of successes in the 64 results.
7803 * (In the unlikely event that there is more than one longest run, LMC
7804 * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
7805 * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
7806 * LMC selects the last passing setting in the run minus
7807 * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting
7808 * in the run (rounding earlier when necessary). We expect the
7809 * read-leveling sequence to produce good results with the reset values
7810 * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
7811 *
7812 * The read-leveling sequence has the following steps:
7813 *
7814 * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
7815 * Do the remaining substeps 2-4 separately for each rank i with
7816 * attached DRAM.
7817 *
7818 * 2. Without changing any other fields in LMC(0)_CONFIG,
7819 *
7820 * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
7821 *
7822 * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
7823 *
7824 * o write LMC(0)_SEQ_CTL[INIT_START] = 1
7825 *
7826 * This initiates the previously-described read-leveling.
7827 *
7828 * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
7829 *
7830 * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte
7831 * lanes at this point.
7832 *
7833 * If ECC DRAM is not present (i.e. when DRAM is not attached to the
7834 * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
7835 * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
7836 * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
7837 * LMC(0)_RLEVEL_RANK*[BYTE0].
7838 *
7839 * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
7840 * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
7841 * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify
7842 * LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all
7843 * BITMASKs can be observed.
7844 *
7845 * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
7846 *
7847 * Let rank i be a rank with attached DRAM.
7848 *
7849 * For all ranks j that do not have attached DRAM, set
7850 * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
7851 *
7852 * This read-leveling sequence can help select the proper CN70XX ODT
7853 * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
7854 * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
7855 * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
7856 * (for a used byte lane k) can indicate that the CN70XX ODT value is
7857 * bad. It is possible to simultaneously optimize both
7858 * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
7859 * performing this read-leveling sequence for several
7860 * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the
7861 * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
7862 */
7863
7864 rl_rodt_err = 0;
7865 rl_dbg_loops = 1;
7866 saved_int_zqcs_dis = 0;
7867 max_adj_rl_del_inc = 0;
7868 rl_print = RLEVEL_PRINTALL_DEFAULT;
7869
7870#ifdef ENABLE_HARDCODED_RLEVEL
7871 part_number[21] = {0};
7872#endif /* ENABLE_HARDCODED_RLEVEL */
7873
7874 pbm_lowsum_limit = 5; // FIXME: is this a good default?
7875 // FIXME: PBM skip for RODT 240 and 34
7876 pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) |
7877 (1U << ddr4_rodt_ctl_34_ohm);
7878
7879 disable_rank_majority = 0; // control rank majority processing
7880
7881 // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
7882 // for DDR3
7883 rldelay_bump_incr = 0;
7884 disable_rlv_bump_this_byte = 0;
7885
7886 enable_rldelay_bump = (ddr_type == DDR4_DRAM) ?
7887 ((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0;
7888
7889 s = lookup_env(priv, "ddr_disable_rank_majority");
7890 if (s)
7891 disable_rank_majority = !!simple_strtoul(s, NULL, 0);
7892
7893 s = lookup_env(priv, "ddr_pbm_lowsum_limit");
7894 if (s)
7895 pbm_lowsum_limit = simple_strtoul(s, NULL, 0);
7896
7897 s = lookup_env(priv, "ddr_pbm_rodt_skip");
7898 if (s)
7899 pbm_rodt_skip = simple_strtoul(s, NULL, 0);
7900 memset(rank_perf, 0, sizeof(rank_perf));
7901
7902 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
7903 save_ddr2t = ctl.cn78xx.ddr2t;
7904
7905 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
7906 ecc_ena = cfg.cn78xx.ecc_ena;
7907
7908 s = lookup_env(priv, "ddr_rlevel_2t");
7909 if (s)
7910 ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0);
7911
7912 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
7913
7914 debug("LMC%d: Performing Read-Leveling\n", if_num);
7915
7916 rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
7917
7918 rl_samples = c_cfg->rlevel_average_loops;
7919 if (rl_samples == 0) {
7920 rl_samples = RLEVEL_SAMPLES_DEFAULT;
7921 // up the samples for these cases
7922 if (dimm_count == 1 || num_ranks == 1)
7923 rl_samples = rl_samples * 2 + 1;
7924 }
7925
7926 rl_compute = c_cfg->rlevel_compute;
7927 rl_ctl.cn78xx.offset_en = c_cfg->offset_en;
7928 rl_ctl.cn78xx.offset = spd_rdimm
7929 ? c_cfg->offset_rdimm
7930 : c_cfg->offset_udimm;
7931
7932 int value = 1; // should ALWAYS be set
7933
7934 s = lookup_env(priv, "ddr_rlevel_delay_unload");
7935 if (s)
7936 value = !!simple_strtoul(s, NULL, 0);
7937 rl_ctl.cn78xx.delay_unload_0 = value;
7938 rl_ctl.cn78xx.delay_unload_1 = value;
7939 rl_ctl.cn78xx.delay_unload_2 = value;
7940 rl_ctl.cn78xx.delay_unload_3 = value;
7941
7942 // use OR_DIS=1 to try for better results
7943 rl_ctl.cn78xx.or_dis = 1;
7944
7945 /*
7946 * If we will be switching to 32bit mode level based on only
7947 * four bits because there are only 4 ECC bits.
7948 */
7949 rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F;
7950
7951 // allow overrides
7952 s = lookup_env(priv, "ddr_rlevel_ctl_or_dis");
7953 if (s)
7954 rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0);
7955
7956 s = lookup_env(priv, "ddr_rlevel_ctl_bitmask");
7957 if (s)
7958 rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0);
7959
7960 rl_comp_offs = spd_rdimm
7961 ? c_cfg->rlevel_comp_offset_rdimm
7962 : c_cfg->rlevel_comp_offset_udimm;
7963 s = lookup_env(priv, "ddr_rlevel_comp_offset");
7964 if (s)
7965 rl_comp_offs = strtoul(s, NULL, 0);
7966
7967 s = lookup_env(priv, "ddr_rlevel_offset");
7968 if (s)
7969 rl_ctl.cn78xx.offset = simple_strtoul(s, NULL, 0);
7970
7971 s = lookup_env(priv, "ddr_rlevel_offset_en");
7972 if (s)
7973 rl_ctl.cn78xx.offset_en = simple_strtoul(s, NULL, 0);
7974
7975 s = lookup_env(priv, "ddr_rlevel_ctl");
7976 if (s)
7977 rl_ctl.u64 = simple_strtoul(s, NULL, 0);
7978
7979 lmc_wr(priv,
7980 CVMX_LMCX_RLEVEL_CTL(if_num),
7981 rl_ctl.u64);
7982
7983 // do this here so we can look at final RLEVEL_CTL[offset] setting...
7984 s = lookup_env(priv, "ddr_enable_rldelay_bump");
7985 if (s) {
7986 // also use as mask bits
7987 enable_rldelay_bump = strtoul(s, NULL, 0);
7988 }
7989
7990 if (enable_rldelay_bump != 0)
7991 rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1;
7992
7993 s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num);
7994 if (s)
7995 rl_dbg_loops = simple_strtoul(s, NULL, 0);
7996
7997 s = lookup_env(priv, "ddr_rtt_nom_auto");
7998 if (s)
7999 ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0);
8000
8001 s = lookup_env(priv, "ddr_rlevel_average");
8002 if (s)
8003 rl_samples = simple_strtoul(s, NULL, 0);
8004
8005 s = lookup_env(priv, "ddr_rlevel_compute");
8006 if (s)
8007 rl_compute = simple_strtoul(s, NULL, 0);
8008
8009 s = lookup_env(priv, "ddr_rlevel_printall");
8010 if (s)
8011 rl_print = simple_strtoul(s, NULL, 0);
8012
8013 debug("RLEVEL_CTL : 0x%016llx\n",
8014 rl_ctl.u64);
8015 debug("RLEVEL_OFFSET : %6d\n",
8016 rl_ctl.cn78xx.offset);
8017 debug("RLEVEL_OFFSET_EN : %6d\n",
8018 rl_ctl.cn78xx.offset_en);
8019
8020 /*
8021 * The purpose for the indexed table is to sort the settings
8022 * by the ohm value to simplify the testing when incrementing
8023 * through the settings. (index => ohms) 1=120, 2=60, 3=40,
8024 * 4=30, 5=20
8025 */
8026 min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ?
8027 1 : c_cfg->min_rtt_nom_idx;
8028 max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ?
8029 5 : c_cfg->max_rtt_nom_idx;
8030
8031 min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl;
8032 max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl;
8033
8034 s = lookup_env(priv, "ddr_min_rodt_ctl");
8035 if (s)
8036 min_rodt_ctl = simple_strtoul(s, NULL, 0);
8037
8038 s = lookup_env(priv, "ddr_max_rodt_ctl");
8039 if (s)
8040 max_rodt_ctl = simple_strtoul(s, NULL, 0);
8041
8042 s = lookup_env(priv, "ddr_min_rtt_nom_idx");
8043 if (s)
8044 min_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8045
8046 s = lookup_env(priv, "ddr_max_rtt_nom_idx");
8047 if (s)
8048 max_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8049
8050#ifdef ENABLE_HARDCODED_RLEVEL
8051 if (c_cfg->rl_tbl) {
8052 /* Check for hard-coded read-leveling settings */
8053 get_dimm_part_number(part_number, &dimm_config_table[0],
8054 0, ddr_type);
8055 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8056 if (!(rank_mask & (1 << rankx)))
8057 continue;
8058
8059 rl_rank.u64 = lmc_rd(priv,
8060 CVMX_LMCX_RLEVEL_RANKX(rankx,
8061 if_num));
8062
8063 i = 0;
8064 while (c_cfg->rl_tbl[i].part) {
8065 debug("DIMM part number:\"%s\", SPD: \"%s\"\n",
8066 c_cfg->rl_tbl[i].part, part_number);
8067 if ((strcmp(part_number,
8068 c_cfg->rl_tbl[i].part) == 0) &&
8069 (abs(c_cfg->rl_tbl[i].speed -
8070 2 * ddr_hertz / (1000 * 1000)) < 10)) {
8071 debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n",
8072 part_number);
8073 rl_rank.u64 =
8074 c_cfg->rl_tbl[i].rl_rank[if_num][rankx];
8075 lmc_wr(priv,
8076 CVMX_LMCX_RLEVEL_RANKX(rankx,
8077 if_num),
8078 rl_rank.u64);
8079 rl_rank.u64 =
8080 lmc_rd(priv,
8081 CVMX_LMCX_RLEVEL_RANKX(rankx,
8082 if_num));
8083 display_rl(if_num, rl_rank, rankx);
8084 /* Disable h/w read-leveling */
8085 rl_dbg_loops = 0;
8086 break;
8087 }
8088 ++i;
8089 }
8090 }
8091 }
8092#endif /* ENABLE_HARDCODED_RLEVEL */
8093
8094 max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment;
8095 s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment");
8096 if (s)
8097 max_adj_rl_del_inc = strtoul(s, NULL, 0);
8098
8099 while (rl_dbg_loops--) {
8100 union cvmx_lmcx_modereg_params1 mp1;
8101 union cvmx_lmcx_comp_ctl2 cc2;
8102
8103 /* Initialize the error scoreboard */
8104 memset(rl_score, 0, sizeof(rl_score));
8105
8106 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8107 saved_ddr__ptune = cc2.cn78xx.ddr__ptune;
8108 saved_ddr__ntune = cc2.cn78xx.ddr__ntune;
8109
8110 /* Disable dynamic compensation settings */
8111 if (rl_comp_offs != 0) {
8112 cc2.cn78xx.ptune = saved_ddr__ptune;
8113 cc2.cn78xx.ntune = saved_ddr__ntune;
8114
8115 /*
8116 * Round up the ptune calculation to bias the odd
8117 * cases toward ptune
8118 */
8119 cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2);
8120 cc2.cn78xx.ntune -= rl_comp_offs / 2;
8121
8122 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8123 saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8124 /* Disable ZQCS while in bypass. */
8125 ctl.s.int_zqcs_dis = 1;
8126 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8127
8128 cc2.cn78xx.byp = 1; /* Enable bypass mode */
8129 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8130 lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8131 /* Read again */
8132 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8133 debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
8134 cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8135 }
8136
8137 mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
8138
8139 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
8140 ++rtt_idx) {
8141 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8142
8143 /*
8144 * When the read ODT mask is zero the dyn_rtt_nom_mask
8145 * is zero than RTT_NOM will not be changing during
8146 * read-leveling. Since the value is fixed we only need
8147 * to test it once.
8148 */
8149 if (dyn_rtt_nom_mask == 0) {
8150 // flag not to print NOM ohms
8151 print_nom_ohms = -1;
8152 } else {
8153 if (dyn_rtt_nom_mask & 1)
8154 mp1.s.rtt_nom_00 = rtt_nom;
8155 if (dyn_rtt_nom_mask & 2)
8156 mp1.s.rtt_nom_01 = rtt_nom;
8157 if (dyn_rtt_nom_mask & 4)
8158 mp1.s.rtt_nom_10 = rtt_nom;
8159 if (dyn_rtt_nom_mask & 8)
8160 mp1.s.rtt_nom_11 = rtt_nom;
8161 // FIXME? rank 0 ohms always?
8162 print_nom_ohms =
8163 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00];
8164 }
8165
8166 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8167 mp1.u64);
8168
8169 if (print_nom_ohms >= 0 && rl_print > 1) {
8170 debug("\n");
8171 debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8172 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8173 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8174 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8175 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8176 mp1.s.rtt_nom_11,
8177 mp1.s.rtt_nom_10,
8178 mp1.s.rtt_nom_01,
8179 mp1.s.rtt_nom_00);
8180 }
8181
8182 ddr_init_seq(priv, rank_mask, if_num);
8183
8184 // Try RANK outside RODT to rearrange the output...
8185 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8186 if (!(rank_mask & (1 << rankx)))
8187 continue;
8188
8189 for (rodt_ctl = max_rodt_ctl;
8190 rodt_ctl >= min_rodt_ctl; --rodt_ctl)
8191 rodt_loop(priv, rankx, rl_score);
8192 }
8193 }
8194
8195 /* Re-enable dynamic compensation settings. */
8196 if (rl_comp_offs != 0) {
8197 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8198
8199 cc2.cn78xx.ptune = 0;
8200 cc2.cn78xx.ntune = 0;
8201 cc2.cn78xx.byp = 0; /* Disable bypass mode */
8202 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8203 /* Read once */
8204 lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8205
8206 /* Read again */
8207 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8208 debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
8209 cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8210
8211 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8212 /* Restore original setting */
8213 ctl.s.int_zqcs_dis = saved_int_zqcs_dis;
8214 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8215 }
8216
8217 int override_compensation = 0;
8218
8219 s = lookup_env(priv, "ddr__ptune");
8220 if (s)
8221 saved_ddr__ptune = strtoul(s, NULL, 0);
8222
8223 s = lookup_env(priv, "ddr__ntune");
8224 if (s) {
8225 saved_ddr__ntune = strtoul(s, NULL, 0);
8226 override_compensation = 1;
8227 }
8228
8229 if (override_compensation) {
8230 cc2.cn78xx.ptune = saved_ddr__ptune;
8231 cc2.cn78xx.ntune = saved_ddr__ntune;
8232
8233 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8234 saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8235 /* Disable ZQCS while in bypass. */
8236 ctl.s.int_zqcs_dis = 1;
8237 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8238
8239 cc2.cn78xx.byp = 1; /* Enable bypass mode */
8240 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8241 /* Read again */
8242 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8243
8244 debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
8245 cc2.cn78xx.ptune, cc2.cn78xx.ntune);
8246 }
8247
8248 /* Evaluation block */
8249 /* Still at initial value? */
8250 int best_rodt_score = DEFAULT_BEST_RANK_SCORE;
8251 int auto_rodt_ctl = 0;
8252 int auto_rtt_nom = 0;
8253 int rodt_score;
8254
8255 rodt_row_skip_mask = 0;
8256
8257 // just add specific RODT rows to the skip mask for DDR4
8258 // at this time...
8259 if (ddr_type == DDR4_DRAM) {
8260 // skip RODT row 34 ohms for all DDR4 types
8261 rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm);
8262 // skip RODT row 40 ohms for all DDR4 types
8263 rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm);
8264 // For now, do not skip RODT row 40 or 48 ohm when
8265 // ddr_hertz is above 1075 MHz
8266 if (ddr_hertz > 1075000000) {
8267 // noskip RODT row 40 ohms
8268 rodt_row_skip_mask &=
8269 ~(1 << ddr4_rodt_ctl_40_ohm);
8270 // noskip RODT row 48 ohms
8271 rodt_row_skip_mask &=
8272 ~(1 << ddr4_rodt_ctl_48_ohm);
8273 }
8274 // For now, do not skip RODT row 48 ohm for 2Rx4
8275 // stacked die DIMMs
8276 if (is_stacked_die && num_ranks == 2 &&
8277 dram_width == 4) {
8278 // noskip RODT row 48 ohms
8279 rodt_row_skip_mask &=
8280 ~(1 << ddr4_rodt_ctl_48_ohm);
8281 }
8282 // for now, leave all rows eligible when we have
8283 // mini-DIMMs...
8284 if (spd_dimm_type == 5 || spd_dimm_type == 6)
8285 rodt_row_skip_mask = 0;
8286 // for now, leave all rows eligible when we have
8287 // a 2-slot 1-rank config
8288 if (dimm_count == 2 && num_ranks == 1)
8289 rodt_row_skip_mask = 0;
8290
8291 debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
8292 for (rtt_idx = min_rtt_nom_idx;
8293 rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
8294 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8295
8296 for (rodt_ctl = max_rodt_ctl;
8297 rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
8298 rodt_score = 0;
8299 for (rankx = 0; rankx < dimm_count * 4;
8300 rankx++) {
8301 if (!(rank_mask & (1 << rankx)))
8302 continue;
8303
8304 debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
8305 rtt_nom, rodt_ctl, rankx,
8306 rl_score[rtt_nom][rodt_ctl][rankx].score);
8307 rodt_score +=
8308 rl_score[rtt_nom][rodt_ctl][rankx].score;
8309 }
8310 // FIXME: do we need to skip RODT rows
8311 // here, like we do below in the
8312 // by-RANK settings?
8313
8314 /*
8315 * When using automatic ODT settings use
8316 * the ODT settings associated with the
8317 * best score for all of the tested ODT
8318 * combinations.
8319 */
8320
8321 if (rodt_score < best_rodt_score ||
8322 (rodt_score == best_rodt_score &&
8323 (imp_val->rodt_ohms[rodt_ctl] >
8324 imp_val->rodt_ohms[auto_rodt_ctl]))) {
8325 debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n",
8326 rodt_ctl,
8327 imp_val->rodt_ohms[rodt_ctl],
8328 rodt_score,
8329 best_rodt_score);
8330 best_rodt_score = rodt_score;
8331 auto_rodt_ctl = rodt_ctl;
8332 auto_rtt_nom = rtt_nom;
8333 }
8334 }
8335 }
8336
8337 mp1.u64 = lmc_rd(priv,
8338 CVMX_LMCX_MODEREG_PARAMS1(if_num));
8339
8340 if (ddr_rtt_nom_auto) {
8341 /* Store the automatically set RTT_NOM value */
8342 if (dyn_rtt_nom_mask & 1)
8343 mp1.s.rtt_nom_00 = auto_rtt_nom;
8344 if (dyn_rtt_nom_mask & 2)
8345 mp1.s.rtt_nom_01 = auto_rtt_nom;
8346 if (dyn_rtt_nom_mask & 4)
8347 mp1.s.rtt_nom_10 = auto_rtt_nom;
8348 if (dyn_rtt_nom_mask & 8)
8349 mp1.s.rtt_nom_11 = auto_rtt_nom;
8350 } else {
8351 /*
8352 * restore the manual settings to the register
8353 */
8354 mp1.s.rtt_nom_00 = default_rtt_nom[0];
8355 mp1.s.rtt_nom_01 = default_rtt_nom[1];
8356 mp1.s.rtt_nom_10 = default_rtt_nom[2];
8357 mp1.s.rtt_nom_11 = default_rtt_nom[3];
8358 }
8359
8360 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8361 mp1.u64);
8362 debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8363 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8364 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8365 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8366 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8367 mp1.s.rtt_nom_11,
8368 mp1.s.rtt_nom_10,
8369 mp1.s.rtt_nom_01,
8370 mp1.s.rtt_nom_00);
8371
8372 debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8373 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
8374 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
8375 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
8376 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
8377 extr_wr(mp1.u64, 3),
8378 extr_wr(mp1.u64, 2),
8379 extr_wr(mp1.u64, 1),
8380 extr_wr(mp1.u64, 0));
8381
8382 debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8383 imp_val->dic_ohms[mp1.s.dic_11],
8384 imp_val->dic_ohms[mp1.s.dic_10],
8385 imp_val->dic_ohms[mp1.s.dic_01],
8386 imp_val->dic_ohms[mp1.s.dic_00],
8387 mp1.s.dic_11,
8388 mp1.s.dic_10,
8389 mp1.s.dic_01,
8390 mp1.s.dic_00);
8391
8392 if (ddr_type == DDR4_DRAM) {
8393 union cvmx_lmcx_modereg_params2 mp2;
8394 /*
8395 * We must read the CSR, and not depend on
8396 * odt_config[odt_idx].odt_mask2, since we could
8397 * have overridden values with envvars.
8398 * NOTE: this corrects the printout, since the
8399 * CSR is not written with the old values...
8400 */
8401 mp2.u64 = lmc_rd(priv,
8402 CVMX_LMCX_MODEREG_PARAMS2(if_num));
8403
8404 debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8405 imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
8406 imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
8407 imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
8408 imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
8409 mp2.s.rtt_park_11,
8410 mp2.s.rtt_park_10,
8411 mp2.s.rtt_park_01,
8412 mp2.s.rtt_park_00);
8413
8414 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n",
8415 "VREF_RANGE",
8416 mp2.s.vref_range_11,
8417 mp2.s.vref_range_10,
8418 mp2.s.vref_range_01,
8419 mp2.s.vref_range_00);
8420
8421 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n",
8422 "VREF_VALUE",
8423 mp2.s.vref_value_11,
8424 mp2.s.vref_value_10,
8425 mp2.s.vref_value_01,
8426 mp2.s.vref_value_00);
8427 }
8428
8429 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8430 if (ddr_rodt_ctl_auto) {
8431 cc2.cn78xx.rodt_ctl = auto_rodt_ctl;
8432 } else {
8433 // back to the original setting
8434 cc2.cn78xx.rodt_ctl = default_rodt_ctl;
8435 }
8436 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8437 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8438 debug("Read ODT_CTL : 0x%x (%d ohms)\n",
8439 cc2.cn78xx.rodt_ctl,
8440 imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
8441
8442 /*
8443 * Use the delays associated with the best score for
8444 * each individual rank
8445 */
8446 debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n");
8447
8448 // this is the the RANK MAJOR LOOP
8449 for (rankx = 0; rankx < dimm_count * 4; rankx++)
8450 rank_major_loop(priv, rankx, rl_score);
8451 } /* Evaluation block */
8452 } /* while(rl_dbg_loops--) */
8453
8454 ctl.cn78xx.ddr2t = save_ddr2t;
8455 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8456 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8457 /* Display final 2T value */
8458 debug("DDR2T : %6d\n",
8459 ctl.cn78xx.ddr2t);
8460
8461 ddr_init_seq(priv, rank_mask, if_num);
8462
8463 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8464 u64 value;
8465 int parameter_set = 0;
8466
8467 if (!(rank_mask & (1 << rankx)))
8468 continue;
8469
8470 rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
8471 if_num));
8472
8473 for (i = 0; i < 9; ++i) {
8474 s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d",
8475 if_num, rankx, i);
8476 if (s) {
8477 parameter_set |= 1;
8478 value = simple_strtoul(s, NULL, 0);
8479
8480 upd_rl_rank(&rl_rank, i, value);
8481 }
8482 }
8483
8484 s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx);
8485 if (s) {
8486 parameter_set |= 1;
8487 value = simple_strtoull(s, NULL, 0);
8488 rl_rank.u64 = value;
8489 }
8490
8491 if (parameter_set) {
8492 lmc_wr(priv,
8493 CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
8494 rl_rank.u64);
8495 rl_rank.u64 = lmc_rd(priv,
8496 CVMX_LMCX_RLEVEL_RANKX(rankx,
8497 if_num));
8498 display_rl(if_num, rl_rank, rankx);
8499 }
8500 }
8501}
8502
8503int init_octeon3_ddr3_interface(struct ddr_priv *priv,
8504 struct ddr_conf *_ddr_conf, u32 _ddr_hertz,
8505 u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num,
8506 u32 _if_mask)
8507{
8508 union cvmx_lmcx_control ctrl;
8509 int ret;
8510 char *s;
8511 int i;
8512
8513 if_num = _if_num;
8514 ddr_hertz = _ddr_hertz;
8515 ddr_conf = _ddr_conf;
8516 if_mask = _if_mask;
8517 odt_1rank_config = ddr_conf->odt_1rank_config;
8518 odt_2rank_config = ddr_conf->odt_2rank_config;
8519 odt_4rank_config = ddr_conf->odt_4rank_config;
8520 dimm_config_table = ddr_conf->dimm_config_table;
8521 c_cfg = &ddr_conf->custom_lmc_config;
8522
8523 /*
8524 * Compute clock rates to the nearest picosecond.
8525 */
8526 tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
8527 eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
8528
8529 dimm_count = 0;
8530 /* Accumulate and report all the errors before giving up */
8531 fatal_error = 0;
8532
8533 /* Flag that indicates safe DDR settings should be used */
8534 safe_ddr_flag = 0;
8535 if_64b = 1; /* Octeon II Default: 64bit interface width */
8536 mem_size_mbytes = 0;
8537 bank_bits = 0;
8538 column_bits_start = 1;
8539 use_ecc = 1;
8540 min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
8541 spd_package = 0;
8542 spd_rawcard = 0;
8543 spd_rawcard_aorb = 0;
8544 spd_rdimm_registers = 0;
8545 is_stacked_die = 0;
8546 is_3ds_dimm = 0; // 3DS
8547 lranks_per_prank = 1; // 3DS: logical ranks per package rank
8548 lranks_bits = 0; // 3DS: logical ranks bits
8549 die_capacity = 0; // in Mbits; only used for 3DS
8550
8551 wl_mask_err = 0;
8552 dyn_rtt_nom_mask = 0;
8553 ddr_disable_chip_reset = 1;
8554 match_wl_rtt_nom = 0;
8555
8556 internal_retries = 0;
8557
8558 disable_deskew_training = 0;
8559 restart_if_dsk_incomplete = 0;
8560 last_lane = ((if_64b) ? 8 : 4) + use_ecc;
8561
8562 disable_sequential_delay_check = 0;
8563 wl_print = WLEVEL_PRINTALL_DEFAULT;
8564
8565 enable_by_rank_init = 1; // FIXME: default by-rank ON
8566 saved_rank_mask = 0;
8567
8568 node = 0;
8569
8570 memset(hwl_alts, 0, sizeof(hwl_alts));
8571
8572 /*
8573 * Initialize these to shut up the compiler. They are configured
8574 * and used only for DDR4
8575 */
8576 ddr4_trrd_lmin = 6000;
8577 ddr4_tccd_lmin = 6000;
8578
8579 debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n",
8580 node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid());
8581
8582 if (dimm_config_table[0].spd_addrs[0] == 0 &&
8583 !dimm_config_table[0].spd_ptrs[0]) {
8584 printf("ERROR: No dimms specified in the dimm_config_table.\n");
8585 return -1;
8586 }
8587
8588 // allow some overrides to be done
8589
8590 // this one controls several things related to DIMM geometry: HWL and RL
8591 disable_sequential_delay_check = c_cfg->disable_sequential_delay_check;
8592 s = lookup_env(priv, "ddr_disable_sequential_delay_check");
8593 if (s)
8594 disable_sequential_delay_check = strtoul(s, NULL, 0);
8595
8596 // this one controls whether chip RESET is done, or LMC init restarted
8597 // from step 6.9.6
8598 s = lookup_env(priv, "ddr_disable_chip_reset");
8599 if (s)
8600 ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
8601
8602 // this one controls whether Deskew Training is performed
8603 s = lookup_env(priv, "ddr_disable_deskew_training");
8604 if (s)
8605 disable_deskew_training = !!strtoul(s, NULL, 0);
8606
8607 if (ddr_verbose(priv)) {
8608 printf("DDR SPD Table:");
8609 for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8610 if (dimm_config_table[didx].spd_addrs[0] == 0)
8611 break;
8612
8613 printf(" --ddr%dspd=0x%02x", if_num,
8614 dimm_config_table[didx].spd_addrs[0]);
8615 if (dimm_config_table[didx].spd_addrs[1] != 0)
8616 printf(",0x%02x",
8617 dimm_config_table[didx].spd_addrs[1]);
8618 }
8619 printf("\n");
8620 }
8621
8622 /*
8623 * Walk the DRAM Socket Configuration Table to see what is installed.
8624 */
8625 for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8626 /* Check for lower DIMM socket populated */
8627 if (validate_dimm(priv, &dimm_config_table[didx], 0)) {
8628 if (ddr_verbose(priv))
8629 report_dimm(&dimm_config_table[didx], 0,
8630 dimm_count, if_num);
8631 ++dimm_count;
8632 } else {
8633 break;
8634 } /* Finished when there is no lower DIMM */
8635 }
8636
8637 initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz,
8638 ddr_ref_hertz, if_num, if_mask);
8639
8640 if (!odt_1rank_config)
8641 odt_1rank_config = disable_odt_config;
8642 if (!odt_2rank_config)
8643 odt_2rank_config = disable_odt_config;
8644 if (!odt_4rank_config)
8645 odt_4rank_config = disable_odt_config;
8646
8647 s = env_get("ddr_safe");
8648 if (s) {
8649 safe_ddr_flag = !!simple_strtoul(s, NULL, 0);
8650 printf("Parameter found in environment. ddr_safe = %d\n",
8651 safe_ddr_flag);
8652 }
8653
8654 if (dimm_count == 0) {
8655 printf("ERROR: DIMM 0 not detected.\n");
8656 return (-1);
8657 }
8658
8659 if (c_cfg->mode32b)
8660 if_64b = 0;
8661
8662 s = lookup_env(priv, "if_64b");
8663 if (s)
8664 if_64b = !!simple_strtoul(s, NULL, 0);
8665
8666 if (if_64b == 1) {
8667 if (octeon_is_cpuid(OCTEON_CN70XX)) {
8668 printf("64-bit interface width is not supported for this Octeon model\n");
8669 ++fatal_error;
8670 }
8671 }
8672
8673 /* ddr_type only indicates DDR4 or DDR3 */
8674 ddr_type = (read_spd(&dimm_config_table[0], 0,
8675 DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3;
8676 debug("DRAM Device Type: DDR%d\n", ddr_type);
8677
8678 if (ddr_type == DDR4_DRAM) {
8679 int spd_module_type;
8680 int asymmetric;
8681 const char *signal_load[4] = { "", "MLS", "3DS", "RSV" };
8682
8683 imp_val = &ddr4_impedence_val;
8684
8685 spd_addr =
8686 read_spd(&dimm_config_table[0], 0,
8687 DDR4_SPD_ADDRESSING_ROW_COL_BITS);
8688 spd_org =
8689 read_spd(&dimm_config_table[0], 0,
8690 DDR4_SPD_MODULE_ORGANIZATION);
8691 spd_banks =
8692 0xFF & read_spd(&dimm_config_table[0], 0,
8693 DDR4_SPD_DENSITY_BANKS);
8694
8695 bank_bits =
8696 (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
8697 /* Controller can only address 4 bits. */
8698 bank_bits = min((int)bank_bits, 4);
8699
8700 spd_package =
8701 0XFF & read_spd(&dimm_config_table[0], 0,
8702 DDR4_SPD_PACKAGE_TYPE);
8703 if (spd_package & 0x80) { // non-monolithic device
8704 is_stacked_die = ((spd_package & 0x73) == 0x11);
8705 debug("DDR4: Package Type 0x%02x (%s), %d die\n",
8706 spd_package, signal_load[(spd_package & 3)],
8707 ((spd_package >> 4) & 7) + 1);
8708 is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
8709 if (is_3ds_dimm) { // is it 3DS?
8710 lranks_per_prank = ((spd_package >> 4) & 7) + 1;
8711 // FIXME: should make sure it is only 2H or 4H
8712 // or 8H?
8713 lranks_bits = lranks_per_prank >> 1;
8714 if (lranks_bits == 4)
8715 lranks_bits = 3;
8716 }
8717 } else if (spd_package != 0) {
8718 // FIXME: print non-zero monolithic device definition
8719 debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
8720 ((spd_package >> 4) & 7) + 1, (spd_package & 3));
8721 }
8722
8723 asymmetric = (spd_org >> 6) & 1;
8724 if (asymmetric) {
8725 int spd_secondary_pkg =
8726 read_spd(&dimm_config_table[0], 0,
8727 DDR4_SPD_SECONDARY_PACKAGE_TYPE);
8728 debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n",
8729 spd_secondary_pkg);
8730 } else {
8731 u64 bus_width =
8732 8 << (0x07 &
8733 read_spd(&dimm_config_table[0], 0,
8734 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
8735 u64 ddr_width = 4 << ((spd_org >> 0) & 0x7);
8736 u64 module_cap;
8737 int shift = (spd_banks & 0x0F);
8738
8739 die_capacity = (shift < 8) ? (256UL << shift) :
8740 ((12UL << (shift & 1)) << 10);
8741 debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
8742 (die_capacity > 512) ? (die_capacity >> 10) :
8743 die_capacity, (die_capacity > 512) ? 'G' : 'M');
8744 module_cap = ((u64)die_capacity << 20) / 8UL *
8745 bus_width / ddr_width *
8746 (1UL + ((spd_org >> 3) & 0x7));
8747
8748 // is it 3DS?
8749 if (is_3ds_dimm) {
8750 module_cap *= (u64)(((spd_package >> 4) & 7) +
8751 1);
8752 }
8753 debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
8754 module_cap >> 30);
8755 }
8756
8757 spd_rawcard =
8758 0xFF & read_spd(&dimm_config_table[0], 0,
8759 DDR4_SPD_REFERENCE_RAW_CARD);
8760 debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard);
8761
8762 spd_module_type =
8763 read_spd(&dimm_config_table[0], 0,
8764 DDR4_SPD_KEY_BYTE_MODULE_TYPE);
8765 if (spd_module_type & 0x80) { // HYBRID module
8766 debug("DDR4: HYBRID module, type %s\n",
8767 ((spd_module_type & 0x70) ==
8768 0x10) ? "NVDIMM" : "UNKNOWN");
8769 }
8770 spd_thermal_sensor =
8771 read_spd(&dimm_config_table[0], 0,
8772 DDR4_SPD_MODULE_THERMAL_SENSOR);
8773 spd_dimm_type = spd_module_type & 0x0F;
8774 spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8775 (spd_dimm_type == 8);
8776 if (spd_rdimm) {
8777 u16 spd_mfgr_id, spd_register_rev, spd_mod_attr;
8778 static const u16 manu_ids[4] = {
8779 0xb380, 0x3286, 0x9780, 0xb304
8780 };
8781 static const char *manu_names[4] = {
8782 "XXX", "XXXXXXX", "XX", "XXXXX"
8783 };
8784 int mc;
8785
8786 spd_mfgr_id =
8787 (0xFFU &
8788 read_spd(&dimm_config_table[0], 0,
8789 DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8790 ((0xFFU &
8791 read_spd(&dimm_config_table[0], 0,
8792 DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB))
8793 << 8);
8794 spd_register_rev =
8795 0xFFU & read_spd(&dimm_config_table[0], 0,
8796 DDR4_SPD_REGISTER_REVISION_NUMBER);
8797 for (mc = 0; mc < 4; mc++)
8798 if (manu_ids[mc] == spd_mfgr_id)
8799 break;
8800
8801 debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n",
8802 (mc >= 4) ? "UNKNOWN" : manu_names[mc],
8803 spd_register_rev);
8804
8805 // RAWCARD A or B must be bit 7=0 and bits 4-0
8806 // either 00000(A) or 00001(B)
8807 spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1);
8808 // RDIMM Module Attributes
8809 spd_mod_attr =
8810 0xFFU & read_spd(&dimm_config_table[0], 0,
8811 DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE);
8812 spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8813 debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n",
8814 spd_mod_attr, (spd_mod_attr >> 4) + 1,
8815 ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8816 spd_rdimm_registers);
8817 }
8818 dimm_type_name = ddr4_dimm_types[spd_dimm_type];
8819 } else { /* if (ddr_type == DDR4_DRAM) */
8820 const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" };
8821
8822 imp_val = &ddr3_impedence_val;
8823
8824 spd_addr =
8825 read_spd(&dimm_config_table[0], 0,
8826 DDR3_SPD_ADDRESSING_ROW_COL_BITS);
8827 spd_org =
8828 read_spd(&dimm_config_table[0], 0,
8829 DDR3_SPD_MODULE_ORGANIZATION);
8830 spd_banks =
8831 read_spd(&dimm_config_table[0], 0,
8832 DDR3_SPD_DENSITY_BANKS) & 0xff;
8833
8834 bank_bits = 3 + ((spd_banks >> 4) & 0x7);
8835 /* Controller can only address 3 bits. */
8836 bank_bits = min((int)bank_bits, 3);
8837 spd_dimm_type =
8838 0x0f & read_spd(&dimm_config_table[0], 0,
8839 DDR3_SPD_KEY_BYTE_MODULE_TYPE);
8840 spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8841 (spd_dimm_type == 9);
8842
8843 spd_package =
8844 0xFF & read_spd(&dimm_config_table[0], 0,
8845 DDR3_SPD_SDRAM_DEVICE_TYPE);
8846 if (spd_package & 0x80) { // non-standard device
8847 debug("DDR3: Device Type 0x%02x (%s), %d die\n",
8848 spd_package, signal_load[(spd_package & 3)],
8849 ((1 << ((spd_package >> 4) & 7)) >> 1));
8850 } else if (spd_package != 0) {
8851 // FIXME: print non-zero monolithic device definition
8852 debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n",
8853 ((1 << (spd_package >> 4) & 7) >> 1),
8854 (spd_package & 3));
8855 }
8856
8857 spd_rawcard =
8858 0xFF & read_spd(&dimm_config_table[0], 0,
8859 DDR3_SPD_REFERENCE_RAW_CARD);
8860 debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard);
8861 spd_thermal_sensor =
8862 read_spd(&dimm_config_table[0], 0,
8863 DDR3_SPD_MODULE_THERMAL_SENSOR);
8864
8865 if (spd_rdimm) {
8866 int spd_mfgr_id, spd_register_rev, spd_mod_attr;
8867
8868 spd_mfgr_id =
8869 (0xFFU &
8870 read_spd(&dimm_config_table[0], 0,
8871 DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8872 ((0xFFU &
8873 read_spd(&dimm_config_table[0], 0,
8874 DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB))
8875 << 8);
8876 spd_register_rev =
8877 0xFFU & read_spd(&dimm_config_table[0], 0,
8878 DDR3_SPD_REGISTER_REVISION_NUMBER);
8879 debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n",
8880 spd_mfgr_id, spd_register_rev);
8881 // Module Attributes
8882 spd_mod_attr =
8883 0xFFU & read_spd(&dimm_config_table[0], 0,
8884 DDR3_SPD_ADDRESS_MAPPING);
8885 spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8886 debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n",
8887 spd_mod_attr,
8888 ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8889 spd_rdimm_registers);
8890 }
8891 dimm_type_name = ddr3_dimm_types[spd_dimm_type];
8892 }
8893
8894 if (spd_thermal_sensor & 0x80) {
8895 debug("DDR%d: SPD: Thermal Sensor PRESENT\n",
8896 (ddr_type == DDR4_DRAM) ? 4 : 3);
8897 }
8898
8899 debug("spd_addr : %#06x\n", spd_addr);
8900 debug("spd_org : %#06x\n", spd_org);
8901 debug("spd_banks : %#06x\n", spd_banks);
8902
8903 row_bits = 12 + ((spd_addr >> 3) & 0x7);
8904 col_bits = 9 + ((spd_addr >> 0) & 0x7);
8905
8906 num_ranks = 1 + ((spd_org >> 3) & 0x7);
8907 dram_width = 4 << ((spd_org >> 0) & 0x7);
8908 num_banks = 1 << bank_bits;
8909
8910 s = lookup_env(priv, "ddr_num_ranks");
8911 if (s)
8912 num_ranks = simple_strtoul(s, NULL, 0);
8913
8914 s = lookup_env(priv, "ddr_enable_by_rank_init");
8915 if (s)
8916 enable_by_rank_init = !!simple_strtoul(s, NULL, 0);
8917
8918 // FIXME: for now, we can only handle a DDR4 2rank-1slot config
8919 // FIXME: also, by-rank init does not work correctly if 32-bit mode...
8920 if (enable_by_rank_init && (ddr_type != DDR4_DRAM ||
8921 dimm_count != 1 || if_64b != 1 ||
8922 num_ranks != 2))
8923 enable_by_rank_init = 0;
8924
8925 if (enable_by_rank_init) {
8926 struct dimm_odt_config *odt_config;
8927 union cvmx_lmcx_modereg_params1 mp1;
8928 union cvmx_lmcx_modereg_params2 modereg_params2;
8929 int by_rank_rodt, by_rank_wr, by_rank_park;
8930
8931 // Do ODT settings changes which work best for 2R-1S configs
8932 debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n");
8933
8934 // setup for modifying config table values - 2 ranks and 1 DIMM
8935 odt_config =
8936 (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0];
8937
8938 // original was 80, first try was 60
8939 by_rank_rodt = ddr4_rodt_ctl_48_ohm;
8940 s = lookup_env(priv, "ddr_by_rank_rodt");
8941 if (s)
8942 by_rank_rodt = strtoul(s, NULL, 0);
8943
8944 odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt;
8945
8946 // this is for MODEREG_PARAMS1 fields
8947 // fetch the original settings
8948 mp1.u64 = odt_config->modereg_params1.u64;
8949
8950 by_rank_wr = ddr4_rttwr_80ohm; // originals were 240
8951 s = lookup_env(priv, "ddr_by_rank_wr");
8952 if (s)
8953 by_rank_wr = simple_strtoul(s, NULL, 0);
8954
8955 // change specific settings here...
8956 insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr);
8957 insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr);
8958
8959 // save final settings
8960 odt_config->modereg_params1.u64 = mp1.u64;
8961
8962 // this is for MODEREG_PARAMS2 fields
8963 // fetch the original settings
8964 modereg_params2.u64 = odt_config->modereg_params2.u64;
8965
8966 by_rank_park = ddr4_rttpark_none; // originals were 120
8967 s = lookup_env(priv, "ddr_by_rank_park");
8968 if (s)
8969 by_rank_park = simple_strtoul(s, NULL, 0);
8970
8971 // change specific settings here...
8972 modereg_params2.s.rtt_park_00 = by_rank_park;
8973 modereg_params2.s.rtt_park_01 = by_rank_park;
8974
8975 // save final settings
8976 odt_config->modereg_params2.u64 = modereg_params2.u64;
8977 }
8978
8979 /*
8980 * FIX
8981 * Check that values are within some theoretical limits.
8982 * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) =
8983 * 14 - 3 - 4 = 7
8984 * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) =
8985 * 18 - 2 - 3 = 13
8986 */
8987 if (col_bits > 13 || col_bits < 7) {
8988 printf("Unsupported number of Col Bits: %d\n", col_bits);
8989 ++fatal_error;
8990 }
8991
8992 /*
8993 * FIX
8994 * Check that values are within some theoretical limits.
8995 * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits =
8996 * 26 - 18 - 1 = 7
8997 * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits =
8998 * 33 - 14 - 1 = 18
8999 */
9000 if (row_bits > 18 || row_bits < 7) {
9001 printf("Unsupported number of Row Bits: %d\n", row_bits);
9002 ++fatal_error;
9003 }
9004
9005 s = lookup_env(priv, "ddr_rdimm_ena");
9006 if (s)
9007 spd_rdimm = !!simple_strtoul(s, NULL, 0);
9008
9009 wl_loops = WLEVEL_LOOPS_DEFAULT;
9010 // accept generic or interface-specific override
9011 s = lookup_env(priv, "ddr_wlevel_loops");
9012 if (!s)
9013 s = lookup_env(priv, "ddr%d_wlevel_loops", if_num);
9014
9015 if (s)
9016 wl_loops = strtoul(s, NULL, 0);
9017
9018 s = lookup_env(priv, "ddr_ranks");
9019 if (s)
9020 num_ranks = simple_strtoul(s, NULL, 0);
9021
9022 bunk_enable = (num_ranks > 1);
9023
9024 if (octeon_is_cpuid(OCTEON_CN7XXX))
9025 column_bits_start = 3;
9026 else
9027 printf("ERROR: Unsupported Octeon model: 0x%x\n",
9028 read_c0_prid());
9029
9030 row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b);
9031 debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n",
9032 row_lsb);
9033
9034 pbank_lsb = row_lsb + row_bits + bunk_enable;
9035 debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
9036
9037 if (lranks_per_prank > 1) {
9038 pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
9039 debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
9040 row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
9041 }
9042
9043 mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
9044 if (num_ranks == 4) {
9045 /*
9046 * Quad rank dimm capacity is equivalent to two dual-rank
9047 * dimms.
9048 */
9049 mem_size_mbytes *= 2;
9050 }
9051
9052 /*
9053 * Mask with 1 bits set for for each active rank, allowing 2 bits
9054 * per dimm. This makes later calculations simpler, as a variety
9055 * of CSRs use this layout. This init needs to be updated for dual
9056 * configs (ie non-identical DIMMs).
9057 *
9058 * Bit 0 = dimm0, rank 0
9059 * Bit 1 = dimm0, rank 1
9060 * Bit 2 = dimm1, rank 0
9061 * Bit 3 = dimm1, rank 1
9062 * ...
9063 */
9064 rank_mask = 0x1;
9065 if (num_ranks > 1)
9066 rank_mask = 0x3;
9067 if (num_ranks > 2)
9068 rank_mask = 0xf;
9069
9070 for (i = 1; i < dimm_count; i++)
9071 rank_mask |= ((rank_mask & 0x3) << (2 * i));
9072
9073 /*
9074 * If we are booting from RAM, the DRAM controller is
9075 * already set up. Just return the memory size
9076 */
9077 if (priv->flags & FLAG_RAM_RESIDENT) {
9078 debug("Ram Boot: Skipping LMC config\n");
9079 return mem_size_mbytes;
9080 }
9081
9082 if (ddr_type == DDR4_DRAM) {
9083 spd_ecc =
9084 !!(read_spd
9085 (&dimm_config_table[0], 0,
9086 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8);
9087 } else {
9088 spd_ecc =
9089 !!(read_spd
9090 (&dimm_config_table[0], 0,
9091 DDR3_SPD_MEMORY_BUS_WIDTH) & 8);
9092 }
9093
9094 char rank_spec[8];
9095
9096 printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
9097 debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n",
9098 dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
9099 rank_spec,
9100 (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits);
9101
9102 if (ddr_type == DDR4_DRAM) {
9103 spd_cas_latency =
9104 ((0xff &
9105 read_spd(&dimm_config_table[0], 0,
9106 DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
9107 spd_cas_latency |=
9108 ((0xff &
9109 read_spd(&dimm_config_table[0], 0,
9110 DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
9111 spd_cas_latency |=
9112 ((0xff &
9113 read_spd(&dimm_config_table[0], 0,
9114 DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
9115 spd_cas_latency |=
9116 ((0xff &
9117 read_spd(&dimm_config_table[0], 0,
9118 DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
9119 } else {
9120 spd_cas_latency =
9121 0xff & read_spd(&dimm_config_table[0], 0,
9122 DDR3_SPD_CAS_LATENCIES_LSB);
9123 spd_cas_latency |=
9124 ((0xff &
9125 read_spd(&dimm_config_table[0], 0,
9126 DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
9127 }
9128 debug("spd_cas_latency : %#06x\n", spd_cas_latency);
9129
9130 if (ddr_type == DDR4_DRAM) {
9131 /*
9132 * No other values for DDR4 MTB and FTB are specified at the
9133 * current time so don't bother reading them. Can't speculate
9134 * how new values will be represented.
9135 */
9136 int spdmtb = 125;
9137 int spdftb = 1;
9138
9139 taamin = spdmtb * read_spd(&dimm_config_table[0], 0,
9140 DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) +
9141 spdftb * (signed char)read_spd(&dimm_config_table[0],
9142 0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
9143
9144 ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9145 DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) +
9146 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9147 DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
9148
9149 ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0,
9150 DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) +
9151 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9152 DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
9153
9154 ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9155 DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) +
9156 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9157 DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
9158
9159 ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9160 DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) +
9161 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9162 DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
9163
9164 ddr4_trasmin = spdmtb *
9165 (((read_spd
9166 (&dimm_config_table[0], 0,
9167 DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
9168 (read_spd
9169 (&dimm_config_table[0], 0,
9170 DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
9171
9172 ddr4_trcmin = spdmtb *
9173 ((((read_spd
9174 (&dimm_config_table[0], 0,
9175 DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) <<
9176 8) + (read_spd
9177 (&dimm_config_table[0], 0,
9178 DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) &
9179 0xff))
9180 + spdftb * (signed char)read_spd(&dimm_config_table[0],
9181 0,
9182 DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
9183
9184 ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9185 DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) <<
9186 8) + (read_spd(&dimm_config_table[0], 0,
9187 DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
9188
9189 ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9190 DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) <<
9191 8) + (read_spd(&dimm_config_table[0], 0,
9192 DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
9193
9194 ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9195 DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) <<
9196 8) + (read_spd(&dimm_config_table[0], 0,
9197 DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
9198
9199 ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9200 DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) <<
9201 8) + (read_spd(&dimm_config_table[0], 0,
9202 DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
9203
9204 ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0,
9205 DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) +
9206 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9207 DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
9208
9209 ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9210 DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) +
9211 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9212 DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
9213
9214 ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9215 DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) +
9216 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9217 DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
9218
9219 debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb);
9220 debug("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdftb);
9221
9222 debug("%-45s : %6d ps (%ld MT/s)\n",
9223 "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin,
9224 pretty_psecs_to_mts(ddr4_tckavgmin));
9225 debug("%-45s : %6d ps\n",
9226 "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax);
9227 debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)",
9228 taamin);
9229 debug("%-45s : %6d ps\n",
9230 "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin);
9231 debug("%-45s : %6d ps\n",
9232 "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin);
9233 debug("%-45s : %6d ps\n",
9234 "Minimum Active to Precharge Delay (tRASmin)",
9235 ddr4_trasmin);
9236 debug("%-45s : %6d ps\n",
9237 "Minimum Active to Active/Refr. Delay (tRCmin)",
9238 ddr4_trcmin);
9239 debug("%-45s : %6d ps\n",
9240 "Minimum Refresh Recovery Delay (tRFC1min)",
9241 ddr4_trfc1min);
9242 debug("%-45s : %6d ps\n",
9243 "Minimum Refresh Recovery Delay (tRFC2min)",
9244 ddr4_trfc2min);
9245 debug("%-45s : %6d ps\n",
9246 "Minimum Refresh Recovery Delay (tRFC4min)",
9247 ddr4_trfc4min);
9248 debug("%-45s : %6d ps\n",
9249 "Minimum Four Activate Window Time (tFAWmin)",
9250 ddr4_tfawmin);
9251 debug("%-45s : %6d ps\n",
9252 "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin);
9253 debug("%-45s : %6d ps\n",
9254 "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin);
9255 debug("%-45s : %6d ps\n",
9256 "Minimum CAS to CAS Delay Time (tCCD_Lmin)",
9257 ddr4_tccd_lmin);
9258
9259#define DDR4_TWR 15000
9260#define DDR4_TWTR_S 2500
9261
9262 tckmin = ddr4_tckavgmin;
9263 twr = DDR4_TWR;
9264 trcd = ddr4_trdcmin;
9265 trrd = ddr4_trrd_smin;
9266 trp = ddr4_trpmin;
9267 tras = ddr4_trasmin;
9268 trc = ddr4_trcmin;
9269 trfc = ddr4_trfc1min;
9270 twtr = DDR4_TWTR_S;
9271 tfaw = ddr4_tfawmin;
9272
9273 if (spd_rdimm) {
9274 spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9275 DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) &
9276 0x1;
9277 } else {
9278 spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9279 DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
9280 }
9281 debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9282 } else {
9283 spd_mtb_dividend =
9284 0xff & read_spd(&dimm_config_table[0], 0,
9285 DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
9286 spd_mtb_divisor =
9287 0xff & read_spd(&dimm_config_table[0], 0,
9288 DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
9289 spd_tck_min =
9290 0xff & read_spd(&dimm_config_table[0], 0,
9291 DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
9292 spd_taa_min =
9293 0xff & read_spd(&dimm_config_table[0], 0,
9294 DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
9295
9296 spd_twr =
9297 0xff & read_spd(&dimm_config_table[0], 0,
9298 DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
9299 spd_trcd =
9300 0xff & read_spd(&dimm_config_table[0], 0,
9301 DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
9302 spd_trrd =
9303 0xff & read_spd(&dimm_config_table[0], 0,
9304 DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
9305 spd_trp =
9306 0xff & read_spd(&dimm_config_table[0], 0,
9307 DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
9308 spd_tras =
9309 0xff & read_spd(&dimm_config_table[0], 0,
9310 DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
9311 spd_tras |=
9312 ((0xff &
9313 read_spd(&dimm_config_table[0], 0,
9314 DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8);
9315 spd_trc =
9316 0xff & read_spd(&dimm_config_table[0], 0,
9317 DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
9318 spd_trc |=
9319 ((0xff &
9320 read_spd(&dimm_config_table[0], 0,
9321 DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4);
9322 spd_trfc =
9323 0xff & read_spd(&dimm_config_table[0], 0,
9324 DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
9325 spd_trfc |=
9326 ((0xff &
9327 read_spd(&dimm_config_table[0], 0,
9328 DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) <<
9329 8);
9330 spd_twtr =
9331 0xff & read_spd(&dimm_config_table[0], 0,
9332 DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
9333 spd_trtp =
9334 0xff & read_spd(&dimm_config_table[0], 0,
9335 DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
9336 spd_tfaw =
9337 0xff & read_spd(&dimm_config_table[0], 0,
9338 DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
9339 spd_tfaw |=
9340 ((0xff &
9341 read_spd(&dimm_config_table[0], 0,
9342 DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8);
9343 spd_addr_mirror =
9344 0xff & read_spd(&dimm_config_table[0], 0,
9345 DDR3_SPD_ADDRESS_MAPPING) & 0x1;
9346 /* Only address mirror unbuffered dimms. */
9347 spd_addr_mirror = spd_addr_mirror && !spd_rdimm;
9348 ftb_dividend =
9349 read_spd(&dimm_config_table[0], 0,
9350 DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
9351 ftb_divisor =
9352 read_spd(&dimm_config_table[0], 0,
9353 DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
9354 /* Make sure that it is not 0 */
9355 ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor;
9356
9357 debug("spd_twr : %#06x\n", spd_twr);
9358 debug("spd_trcd : %#06x\n", spd_trcd);
9359 debug("spd_trrd : %#06x\n", spd_trrd);
9360 debug("spd_trp : %#06x\n", spd_trp);
9361 debug("spd_tras : %#06x\n", spd_tras);
9362 debug("spd_trc : %#06x\n", spd_trc);
9363 debug("spd_trfc : %#06x\n", spd_trfc);
9364 debug("spd_twtr : %#06x\n", spd_twtr);
9365 debug("spd_trtp : %#06x\n", spd_trtp);
9366 debug("spd_tfaw : %#06x\n", spd_tfaw);
9367 debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9368
9369 mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
9370 taamin = mtb_psec * spd_taa_min;
9371 taamin += ftb_dividend *
9372 (signed char)read_spd(&dimm_config_table[0],
9373 0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) /
9374 ftb_divisor;
9375 tckmin = mtb_psec * spd_tck_min;
9376 tckmin += ftb_dividend *
9377 (signed char)read_spd(&dimm_config_table[0],
9378 0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) /
9379 ftb_divisor;
9380
9381 twr = spd_twr * mtb_psec;
9382 trcd = spd_trcd * mtb_psec;
9383 trrd = spd_trrd * mtb_psec;
9384 trp = spd_trp * mtb_psec;
9385 tras = spd_tras * mtb_psec;
9386 trc = spd_trc * mtb_psec;
9387 trfc = spd_trfc * mtb_psec;
9388 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) {
9389 // default to this - because it works...
9390 int new_trfc = 260000;
9391
9392 s = env_get("ddr_trfc");
9393 if (s) {
9394 new_trfc = simple_strtoul(s, NULL, 0);
9395 printf("Parameter found in environment. ddr_trfc = %d\n",
9396 new_trfc);
9397 if (new_trfc < 160000 || new_trfc > 260000) {
9398 // back to default if out of range
9399 new_trfc = 260000;
9400 }
9401 }
9402 debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n",
9403 node, if_num, trfc, new_trfc);
9404 trfc = new_trfc;
9405 }
9406
9407 twtr = spd_twtr * mtb_psec;
9408 trtp = spd_trtp * mtb_psec;
9409 tfaw = spd_tfaw * mtb_psec;
9410
9411 debug("Medium Timebase (MTB) : %6d ps\n",
9412 mtb_psec);
9413 debug("Minimum Cycle Time (tckmin) : %6d ps (%ld MT/s)\n",
9414 tckmin, pretty_psecs_to_mts(tckmin));
9415 debug("Minimum CAS Latency Time (taamin) : %6d ps\n",
9416 taamin);
9417 debug("Write Recovery Time (tWR) : %6d ps\n",
9418 twr);
9419 debug("Minimum RAS to CAS delay (tRCD) : %6d ps\n",
9420 trcd);
9421 debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n",
9422 trrd);
9423 debug("Minimum Row Precharge Delay (tRP) : %6d ps\n",
9424 trp);
9425 debug("Minimum Active to Precharge (tRAS) : %6d ps\n",
9426 tras);
9427 debug("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n",
9428 trc);
9429 debug("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n",
9430 trfc);
9431 debug("Internal write to read command delay (tWTR) : %6d ps\n",
9432 twtr);
9433 debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n",
9434 trtp);
9435 debug("Minimum Four Activate Window Delay (tFAW) : %6d ps\n",
9436 tfaw);
9437 }
9438
9439 /*
9440 * When the cycle time is within 1 psec of the minimum accept it
9441 * as a slight rounding error and adjust it to exactly the minimum
9442 * cycle time. This avoids an unnecessary warning.
9443 */
9444 if (abs(tclk_psecs - tckmin) < 2)
9445 tclk_psecs = tckmin;
9446
9447 if (tclk_psecs < (u64)tckmin) {
9448 printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n",
9449 tclk_psecs, (ulong)tckmin);
9450 }
9451
9452 debug("DDR Clock Rate (tCLK) : %6ld ps\n",
9453 tclk_psecs);
9454 debug("Core Clock Rate (eCLK) : %6ld ps\n",
9455 eclk_psecs);
9456
9457 s = env_get("ddr_use_ecc");
9458 if (s) {
9459 use_ecc = !!simple_strtoul(s, NULL, 0);
9460 printf("Parameter found in environment. ddr_use_ecc = %d\n",
9461 use_ecc);
9462 }
9463 use_ecc = use_ecc && spd_ecc;
9464
9465 if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff)
9466 : (use_ecc ? 0x01f : 0x0f);
9467
9468 debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n",
9469 if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask);
9470
9471 debug("\n------ Board Custom Configuration Settings ------\n");
9472 debug("%-45s : %d\n", "MIN_RTT_NOM_IDX ", c_cfg->min_rtt_nom_idx);
9473 debug("%-45s : %d\n", "MAX_RTT_NOM_IDX ", c_cfg->max_rtt_nom_idx);
9474 debug("%-45s : %d\n", "MIN_RODT_CTL ", c_cfg->min_rodt_ctl);
9475 debug("%-45s : %d\n", "MAX_RODT_CTL ", c_cfg->max_rodt_ctl);
9476 debug("%-45s : %d\n", "MIN_CAS_LATENCY ", c_cfg->min_cas_latency);
9477 debug("%-45s : %d\n", "OFFSET_EN ", c_cfg->offset_en);
9478 debug("%-45s : %d\n", "OFFSET_UDIMM ", c_cfg->offset_udimm);
9479 debug("%-45s : %d\n", "OFFSET_RDIMM ", c_cfg->offset_rdimm);
9480 debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", c_cfg->ddr_rtt_nom_auto);
9481 debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto);
9482 if (spd_rdimm)
9483 debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9484 c_cfg->rlevel_comp_offset_rdimm);
9485 else
9486 debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9487 c_cfg->rlevel_comp_offset_udimm);
9488 debug("%-45s : %d\n", "RLEVEL_COMPUTE ", c_cfg->rlevel_compute);
9489 debug("%-45s : %d\n", "DDR2T_UDIMM ", c_cfg->ddr2t_udimm);
9490 debug("%-45s : %d\n", "DDR2T_RDIMM ", c_cfg->ddr2t_rdimm);
9491 debug("%-45s : %d\n", "FPRCH2 ", c_cfg->fprch2);
9492 debug("%-45s : %d\n", "PTUNE_OFFSET ", c_cfg->ptune_offset);
9493 debug("%-45s : %d\n", "NTUNE_OFFSET ", c_cfg->ntune_offset);
9494 debug("-------------------------------------------------\n");
9495
9496 cl = divide_roundup(taamin, tclk_psecs);
9497
9498 debug("Desired CAS Latency : %6d\n", cl);
9499
9500 min_cas_latency = c_cfg->min_cas_latency;
9501
9502 s = lookup_env(priv, "ddr_min_cas_latency");
9503 if (s)
9504 min_cas_latency = simple_strtoul(s, NULL, 0);
9505
9506 debug("CAS Latencies supported in DIMM :");
9507 base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4;
9508 for (i = 0; i < 32; ++i) {
9509 if ((spd_cas_latency >> i) & 1) {
9510 debug(" %d", i + base_cl);
9511 max_cas_latency = i + base_cl;
9512 if (min_cas_latency == 0)
9513 min_cas_latency = i + base_cl;
9514 }
9515 }
9516 debug("\n");
9517
9518 /*
9519 * Use relaxed timing when running slower than the minimum
9520 * supported speed. Adjust timing to match the smallest supported
9521 * CAS Latency.
9522 */
9523 if (min_cas_latency > cl) {
9524 ulong adjusted_tclk = taamin / min_cas_latency;
9525
9526 cl = min_cas_latency;
9527 debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n",
9528 tclk_psecs, adjusted_tclk);
9529 tclk_psecs = adjusted_tclk;
9530 }
9531
9532 s = env_get("ddr_cas_latency");
9533 if (s) {
9534 override_cas_latency = simple_strtoul(s, NULL, 0);
9535 printf("Parameter found in environment. ddr_cas_latency = %d\n",
9536 override_cas_latency);
9537 }
9538
9539 /* Make sure that the selected cas latency is legal */
9540 for (i = (cl - base_cl); i < 32; ++i) {
9541 if ((spd_cas_latency >> i) & 1) {
9542 cl = i + base_cl;
9543 break;
9544 }
9545 }
9546
9547 if (max_cas_latency < cl)
9548 cl = max_cas_latency;
9549
9550 if (override_cas_latency != 0)
9551 cl = override_cas_latency;
9552
9553 debug("CAS Latency : %6d\n", cl);
9554
9555 if ((cl * tckmin) > 20000) {
9556 debug("(CLactual * tckmin) = %d exceeds 20 ns\n",
9557 (cl * tckmin));
9558 }
9559
9560 if (tclk_psecs < (ulong)tckmin) {
9561 printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n",
9562 tclk_psecs, (ulong)tckmin);
9563 }
9564
9565 if (num_banks != 4 && num_banks != 8 && num_banks != 16) {
9566 printf("Unsupported number of banks %d. Must be 4 or 8.\n",
9567 num_banks);
9568 ++fatal_error;
9569 }
9570
9571 if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) {
9572 printf("Unsupported number of ranks: %d\n", num_ranks);
9573 ++fatal_error;
9574 }
9575
9576 if (octeon_is_cpuid(OCTEON_CN78XX) ||
9577 octeon_is_cpuid(OCTEON_CN73XX) ||
9578 octeon_is_cpuid(OCTEON_CNF75XX)) {
9579 if (dram_width != 8 && dram_width != 16 && dram_width != 4) {
9580 printf("Unsupported SDRAM Width, %d. Must be 4, 8 or 16.\n",
9581 dram_width);
9582 ++fatal_error;
9583 }
9584 } else if (dram_width != 8 && dram_width != 16) {
9585 printf("Unsupported SDRAM Width, %d. Must be 8 or 16.\n",
9586 dram_width);
9587 ++fatal_error;
9588 }
9589
9590 /*
9591 ** Bail out here if things are not copasetic.
9592 */
9593 if (fatal_error)
9594 return (-1);
9595
9596 /*
9597 * 4.8.4 LMC RESET Initialization
9598 *
9599 * The purpose of this step is to assert/deassert the RESET# pin at the
9600 * DDR3/DDR4 parts.
9601 *
9602 * This LMC RESET step is done for all enabled LMCs.
9603 */
9604 perform_lmc_reset(priv, node, if_num);
9605
9606 // Make sure scrambling is disabled during init...
9607 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
9608 ctrl.s.scramble_ena = 0;
9609 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
9610
9611 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0);
9612 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0);
9613 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
9614 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0);
9615
9616 odt_idx = min(dimm_count - 1, 3);
9617
9618 switch (num_ranks) {
9619 case 1:
9620 odt_config = odt_1rank_config;
9621 break;
9622 case 2:
9623 odt_config = odt_2rank_config;
9624 break;
9625 case 4:
9626 odt_config = odt_4rank_config;
9627 break;
9628 default:
9629 odt_config = disable_odt_config;
9630 printf("Unsupported number of ranks: %d\n", num_ranks);
9631 ++fatal_error;
9632 }
9633
9634 /*
9635 * 4.8.5 Early LMC Initialization
9636 *
9637 * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
9638 * completed prior to starting this LMC initialization sequence.
9639 *
9640 * Perform the following five substeps for early LMC initialization:
9641 *
9642 * 1. Software must ensure there are no pending DRAM transactions.
9643 *
9644 * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
9645 * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
9646 * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
9647 * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
9648 * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
9649 * appropriate values. All sections in this chapter can be used to
9650 * derive proper register settings.
9651 */
9652
9653 /* LMC(0)_CONFIG */
9654 lmc_config(priv);
9655
9656 /* LMC(0)_CONTROL */
9657 lmc_control(priv);
9658
9659 /* LMC(0)_TIMING_PARAMS0 */
9660 lmc_timing_params0(priv);
9661
9662 /* LMC(0)_TIMING_PARAMS1 */
9663 lmc_timing_params1(priv);
9664
9665 /* LMC(0)_TIMING_PARAMS2 */
9666 lmc_timing_params2(priv);
9667
9668 /* LMC(0)_MODEREG_PARAMS0 */
9669 lmc_modereg_params0(priv);
9670
9671 /* LMC(0)_MODEREG_PARAMS1 */
9672 lmc_modereg_params1(priv);
9673
9674 /* LMC(0)_MODEREG_PARAMS2 */
9675 lmc_modereg_params2(priv);
9676
9677 /* LMC(0)_MODEREG_PARAMS3 */
9678 lmc_modereg_params3(priv);
9679
9680 /* LMC(0)_NXM */
9681 lmc_nxm(priv);
9682
9683 /* LMC(0)_WODT_MASK */
9684 lmc_wodt_mask(priv);
9685
9686 /* LMC(0)_RODT_MASK */
9687 lmc_rodt_mask(priv);
9688
9689 /* LMC(0)_COMP_CTL2 */
9690 lmc_comp_ctl2(priv);
9691
9692 /* LMC(0)_PHY_CTL */
9693 lmc_phy_ctl(priv);
9694
9695 /* LMC(0)_EXT_CONFIG */
9696 lmc_ext_config(priv);
9697
9698 /* LMC(0)_EXT_CONFIG2 */
9699 lmc_ext_config2(priv);
9700
9701 /* LMC(0)_DIMM0/1_PARAMS */
9702 lmc_dimm01_params(priv);
9703
9704 ret = lmc_rank_init(priv);
9705 if (ret < 0)
9706 return 0; /* 0 indicates problem */
9707
9708 lmc_config_2(priv);
9709
9710 lmc_write_leveling(priv);
9711
9712 lmc_read_leveling(priv);
9713
9714 lmc_workaround(priv);
9715
9716 ret = lmc_sw_write_leveling(priv);
9717 if (ret < 0)
9718 return 0; /* 0 indicates problem */
9719
9720 // this sometimes causes stack overflow crashes..
9721 // display only for DDR4 RDIMMs.
9722 if (ddr_type == DDR4_DRAM && spd_rdimm) {
9723 int i;
9724
9725 for (i = 0; i < 3; i += 2) // just pages 0 and 2 for now..
9726 display_mpr_page(priv, rank_mask, if_num, i);
9727 }
9728
9729 lmc_dll(priv);
9730
9731 lmc_workaround_2(priv);
9732
9733 lmc_final(priv);
9734
9735 lmc_scrambling(priv);
9736
9737 return mem_size_mbytes;
9738}
9739
9740///// HW-assist byte DLL offset tuning //////
9741
9742static int cvmx_dram_get_num_lmc(struct ddr_priv *priv)
9743{
9744 union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9745
9746 if (octeon_is_cpuid(OCTEON_CN70XX))
9747 return 1;
9748
9749 if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) {
9750 // sample LMC1
9751 lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1));
9752 if (lmcx_dll_ctl2.cn78xx.intf_en)
9753 return 2;
9754 else
9755 return 1;
9756 }
9757
9758 // for CN78XX, LMCs are always active in pairs, and always LMC0/1
9759 // so, we sample LMC2 to see if 2 and 3 are active
9760 lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2));
9761 if (lmcx_dll_ctl2.cn78xx.intf_en)
9762 return 4;
9763 else
9764 return 2;
9765}
9766
9767// got to do these here, even though already defined in BDK
9768
9769// all DDR3, and DDR4 x16 today, use only 3 bank bits;
9770// DDR4 x4 and x8 always have 4 bank bits
9771// NOTE: this will change in the future, when DDR4 x16 devices can
9772// come with 16 banks!! FIXME!!
9773static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc)
9774{
9775 union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9776 union cvmx_lmcx_config lmcx_config;
9777 union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl;
9778 int bank_width;
9779
9780 // can always read this
9781 lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9782
9783 if (lmcx_dll_ctl2.cn78xx.dreset) // check LMCn
9784 return 0;
9785
9786 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9787 lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc));
9788
9789 bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) &&
9790 (lmcx_config.s.bg2_enable)) ? 4 : 3;
9791
9792 return bank_width;
9793}
9794
9795#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
9796#define ADDRESS_HOLE 0x10000000ULL
9797
9798static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address,
9799 int *node, int *lmc, int *dimm,
9800 int *prank, int *lrank, int *bank,
9801 int *row, int *col)
9802{
9803 int bank_lsb, xbits;
9804 union cvmx_l2c_ctl l2c_ctl;
9805 union cvmx_lmcx_config lmcx_config;
9806 union cvmx_lmcx_control lmcx_control;
9807 union cvmx_lmcx_ext_config ext_config;
9808 int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
9809 int bank_width;
9810 int dimm_lsb;
9811 int dimm_width;
9812 int prank_lsb, lrank_lsb;
9813 int prank_width, lrank_width;
9814 int row_lsb;
9815 int row_width;
9816 int col_hi_lsb;
9817 int col_hi_width;
9818 int col_hi;
9819
9820 if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
9821 bitno = 18;
9822
9823 *node = EXTRACT(address, 40, 2); /* Address bits [41:40] */
9824
9825 address &= (1ULL << 40) - 1; // lop off any node bits or above
9826 if (address >= ADDRESS_HOLE) // adjust down if at HOLE or above
9827 address -= ADDRESS_HOLE;
9828
9829 /* Determine the LMC controllers */
9830 l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL);
9831
9832 /* xbits depends on number of LMCs */
9833 xbits = cvmx_dram_get_num_lmc(priv) >> 1; // 4->2, 2->1, 1->0
9834 bank_lsb = 7 + xbits;
9835
9836 /* LMC number is probably aliased */
9837 if (l2c_ctl.s.disidxalias) {
9838 *lmc = EXTRACT(address, 7, xbits);
9839 } else {
9840 *lmc = EXTRACT(address, 7, xbits) ^
9841 EXTRACT(address, bitno, xbits) ^
9842 EXTRACT(address, 12, xbits);
9843 }
9844
9845 /* Figure out the bank field width */
9846 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc));
9847 ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc));
9848 bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc);
9849
9850 /* Extract additional info from the LMC_CONFIG CSR */
9851 dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
9852 dimm_width = 40 - dimm_lsb;
9853 prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
9854 prank_width = dimm_lsb - prank_lsb;
9855 lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
9856 lrank_width = prank_lsb - lrank_lsb;
9857 row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
9858 row_width = lrank_lsb - row_lsb;
9859 col_hi_lsb = bank_lsb + bank_width;
9860 col_hi_width = row_lsb - col_hi_lsb;
9861
9862 /* Extract the parts of the address */
9863 *dimm = EXTRACT(address, dimm_lsb, dimm_width);
9864 *prank = EXTRACT(address, prank_lsb, prank_width);
9865 *lrank = EXTRACT(address, lrank_lsb, lrank_width);
9866 *row = EXTRACT(address, row_lsb, row_width);
9867
9868 /* bank calculation may be aliased... */
9869 lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc));
9870 if (lmcx_control.s.xor_bank) {
9871 *bank = EXTRACT(address, bank_lsb, bank_width) ^
9872 EXTRACT(address, 12 + xbits, bank_width);
9873 } else {
9874 *bank = EXTRACT(address, bank_lsb, bank_width);
9875 }
9876
9877 /* LMC number already extracted */
9878 col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
9879 *col = EXTRACT(address, 3, 4) | (col_hi << 4);
9880 /* Bus byte is address bits [2:0]. Unused here */
9881}
9882
9883// end of added workarounds
9884
9885// NOTE: "mode" argument:
9886// DBTRAIN_TEST: for testing using GP patterns, includes ECC
9887// DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns)
9888// DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
9889// NOTE: trust the caller to specify the correct/supported mode
9890//
9891static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p,
9892 int mode, u64 *xor_data)
9893{
9894 u64 p1;
9895 u64 k;
9896 int errors = 0;
9897
9898 u64 mpr_data0, mpr_data1;
9899 u64 bad_bits[2] = { 0, 0 };
9900
9901 int node_address, lmc, dimm;
9902 int prank, lrank;
9903 int bank, row, col;
9904 int save_or_dis;
9905 int byte;
9906 int ba_loop, ba_bits;
9907
9908 union cvmx_lmcx_rlevel_ctl rlevel_ctl;
9909 union cvmx_lmcx_dbtrain_ctl dbtrain_ctl;
9910 union cvmx_lmcx_phy_ctl phy_ctl;
9911
9912 int biter_errs;
9913
9914 // FIXME: K iterations set to 4 for now.
9915 // FIXME: decrement to increase interations.
9916 // FIXME: must be no less than 22 to stay above an LMC hash field.
9917 int kshift = 27;
9918
9919 const char *s;
9920 int node = 0;
9921
9922 // allow override default setting for kshift
9923 s = env_get("ddr_tune_set_kshift");
9924 if (s) {
9925 int temp = simple_strtoul(s, NULL, 0);
9926
9927 if (temp < 22 || temp > 28) {
9928 debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
9929 node, if_num, temp, kshift);
9930 } else {
9931 debug("N%d.LMC%d: overriding kshift (%d) to %d\n",
9932 node, if_num, kshift, temp);
9933 kshift = temp;
9934 }
9935 }
9936
9937 /*
9938 * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
9939 */
9940 rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
9941 save_or_dis = rlevel_ctl.s.or_dis;
9942 /* or_dis must be disabled for this sequence */
9943 rlevel_ctl.s.or_dis = 0;
9944 lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
9945
9946 /*
9947 * NOTE: this step done in the calling routine(s)...
9948 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
9949 * of choice.
9950 * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
9951 * (rising edge) 64 bits of data.
9952 * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
9953 * (falling edge) 64 bits of data.
9954 * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
9955 * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data.
9956 */
9957
9958 // final address must include LMC and node
9959 p |= (if_num << 7); /* Map address into proper interface */
9960 p |= (u64)node << CVMX_NODE_MEM_SHIFT; // map to node
9961
9962 /*
9963 * Add base offset to both test regions to not clobber u-boot stuff
9964 * when running from L2 for NAND boot.
9965 */
9966 p += 0x20000000; // offset to 512MB, ie above THE HOLE!!!
9967 p |= 1ull << 63; // needed for OCTEON
9968
9969 errors = 0;
9970
9971 cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm,
9972 &prank, &lrank, &bank, &row, &col);
9973 debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n",
9974 __func__, p, node_address, lmc, dimm, prank, lrank, bank,
9975 row, col);
9976
9977 // only check once per call, and ignore if no match...
9978 if ((int)node != node_address) {
9979 printf("ERROR: Node address mismatch\n");
9980 return 0;
9981 }
9982 if (lmc != if_num) {
9983 printf("ERROR: LMC address mismatch\n");
9984 return 0;
9985 }
9986
9987 /*
9988 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as
9989 * it’s a one-shot operation). This is to get into the habit of
9990 * resetting PHY’s SILO to the original 0 location.
9991 */
9992 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
9993 phy_ctl.s.phy_reset = 1;
9994 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
9995
9996 /*
9997 * Walk through a range of addresses avoiding bits that alias
9998 * interfaces on the CN88XX.
9999 */
10000
10001 // FIXME: want to try to keep the K increment from affecting the
10002 // LMC via hash, so keep it above bit 21 we also want to keep k
10003 // less than the base offset of bit 29 (512MB)
10004
10005 for (k = 0; k < (1UL << 29); k += (1UL << kshift)) {
10006 // FIXME: the sequence will interate over 1/2 cacheline
10007 // FIXME: for each unit specified in "read_cmd_count",
10008 // FIXME: so, we setup each sequence to do the max cachelines
10009 // it can
10010
10011 p1 = p + k;
10012
10013 cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc,
10014 &dimm, &prank, &lrank, &bank,
10015 &row, &col);
10016
10017 /*
10018 * 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
10019 * a. COL, ROW, BA, BG, PRANK points to the starting point
10020 * of the address.
10021 * You can just set them to all 0.
10022 * b. RW_TRAIN – set this to 1.
10023 * c. TCCD_L – set this to 0.
10024 * d. READ_CMD_COUNT – instruct the sequence to the how many
10025 * writes/reads.
10026 * It is 5 bits field, so set to 31 of maximum # of r/w.
10027 */
10028 dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num));
10029 dbtrain_ctl.s.column_a = col;
10030 dbtrain_ctl.s.row_a = row;
10031 dbtrain_ctl.s.bg = (bank >> 2) & 3;
10032 dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME?
10033 dbtrain_ctl.s.lrank = lrank; // FIXME?
10034 dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
10035 dbtrain_ctl.s.write_ena = 1;
10036 dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x
10037 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
10038 octeon_is_cpuid(OCTEON_CNF75XX)) {
10039 // max count on chips that support it
10040 dbtrain_ctl.s.cmd_count_ext = 3;
10041 } else {
10042 // max count pass 1.x
10043 dbtrain_ctl.s.cmd_count_ext = 0;
10044 }
10045
10046 dbtrain_ctl.s.rw_train = 1;
10047 dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
10048 // LFSR should only be on when chip supports it...
10049 dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
10050
10051 biter_errs = 0;
10052
10053 // for each address, iterate over the 4 "banks" in the BA
10054 for (ba_loop = 0, ba_bits = bank & 3;
10055 ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) {
10056 dbtrain_ctl.s.ba = ba_bits;
10057 lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num),
10058 dbtrain_ctl.u64);
10059
10060 /*
10061 * We will use the RW_TRAINING sequence (14) for
10062 * this task.
10063 *
10064 * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14,
10065 * SEQ_CTL[INIT_START] = 1).
10066 * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
10067 */
10068 oct3_ddr3_seq(priv, prank, if_num, 14);
10069
10070 /*
10071 * 6) Read MPR_DATA0 and MPR_DATA1 for results.
10072 * a. MPR_DATA0[MPR_DATA<63:0>] – comparison results
10073 * for DQ63:DQ0. (1 means MATCH, 0 means FAIL).
10074 * b. MPR_DATA1[MPR_DATA<7:0>] – comparison results
10075 * for ECC bit7:0.
10076 */
10077 mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
10078 mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num));
10079
10080 /*
10081 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically
10082 * clears this as it’s a one-shot operation).
10083 * This is to get into the habit of resetting PHY’s
10084 * SILO to the original 0 location.
10085 */
10086 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
10087 phy_ctl.s.phy_reset = 1;
10088 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
10089
10090 // bypass any error checking or updating when DBI mode
10091 if (mode == DBTRAIN_DBI)
10092 continue;
10093
10094 // data bytes
10095 if (~mpr_data0) {
10096 for (byte = 0; byte < 8; byte++) {
10097 if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
10098 biter_errs |= (1 << byte);
10099 }
10100 // accumulate bad bits
10101 bad_bits[0] |= ~mpr_data0;
10102 }
10103
10104 // include ECC byte errors
10105 if (~mpr_data1 & 0xffUL) {
10106 biter_errs |= (1 << 8);
10107 bad_bits[1] |= ~mpr_data1 & 0xffUL;
10108 }
10109 }
10110
10111 errors |= biter_errs;
10112 } /* end for (k=...) */
10113
10114 rlevel_ctl.s.or_dis = save_or_dis;
10115 lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
10116
10117 // send the bad bits back...
10118 if (mode != DBTRAIN_DBI && xor_data) {
10119 xor_data[0] = bad_bits[0];
10120 xor_data[1] = bad_bits[1];
10121 }
10122
10123 return errors;
10124}
10125
10126// setup default for byte test pattern array
10127// take these from the HRM section 6.9.13
10128static const u64 byte_pattern_0[] = {
10129 0xFFAAFFFFFF55FFFFULL, // GP0
10130 0x55555555AAAAAAAAULL, // GP1
10131 0xAA55AAAAULL, // GP2
10132};
10133
10134static const u64 byte_pattern_1[] = {
10135 0xFBF7EFDFBF7FFEFDULL, // GP0
10136 0x0F1E3C78F0E1C387ULL, // GP1
10137 0xF0E1BF7FULL, // GP2
10138};
10139
10140// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
10141static const u64 byte_pattern_2[] = {
10142 0xEE55AADDEE55AADDULL, // GP0
10143 0x55AADDEE55AADDEEULL, // GP1
10144 0x55EEULL, // GP2
10145};
10146
10147// this is from Mike via LFSR with PRBS=0x4A519909
10148static const u64 byte_pattern_3[] = {
10149 0x0088CCEE0088CCEEULL, // GP0
10150 0xBB552211BB552211ULL, // GP1
10151 0xBB00ULL, // GP2
10152};
10153
10154static const u64 *byte_patterns[4] = {
10155 byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3
10156};
10157
10158static const u32 lfsr_patterns[4] = {
10159 0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL
10160};
10161
10162#define NUM_BYTE_PATTERNS 4
10163
10164#define DEFAULT_BYTE_BURSTS 32 // compromise between time and rigor
10165
10166static void setup_hw_pattern(struct ddr_priv *priv, int lmc,
10167 const u64 *pattern_p)
10168{
10169 /*
10170 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
10171 * of choice.
10172 * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
10173 * (rising edge) 64 bits of data.
10174 * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
10175 * (falling edge) 64 bits of data.
10176 * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
10177 * (rising edge <7:0>) and upper
10178 * (falling edge <15:8>) ECC data.
10179 */
10180 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
10181 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
10182 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
10183}
10184
10185static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data)
10186{
10187 union cvmx_lmcx_char_ctl char_ctl;
10188 u32 prbs;
10189 const char *s;
10190
10191 s = env_get("ddr_lfsr_prbs");
10192 if (s)
10193 prbs = simple_strtoul(s, NULL, 0);
10194 else
10195 prbs = data;
10196
10197 /*
10198 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10199 * here data comes from the LFSR generating a PRBS pattern
10200 * CHAR_CTL.EN = 0
10201 * CHAR_CTL.SEL = 0; // for PRBS
10202 * CHAR_CTL.DR = 1;
10203 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10204 * CHAR_CTL.SKEW_ON = 1;
10205 */
10206 char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc));
10207 char_ctl.s.en = 0;
10208 char_ctl.s.sel = 0;
10209 char_ctl.s.dr = 1;
10210 char_ctl.s.prbs = prbs;
10211 char_ctl.s.skew_on = 1;
10212 lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64);
10213}
10214
10215static int choose_best_hw_patterns(int lmc, int mode)
10216{
10217 int new_mode = mode;
10218 const char *s;
10219
10220 switch (mode) {
10221 case DBTRAIN_TEST: // always choose LFSR if chip supports it
10222 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10223 int lfsr_enable = 1;
10224
10225 s = env_get("ddr_allow_lfsr");
10226 if (s) {
10227 // override?
10228 lfsr_enable = !!strtoul(s, NULL, 0);
10229 }
10230
10231 if (lfsr_enable)
10232 new_mode = DBTRAIN_LFSR;
10233 }
10234 break;
10235
10236 case DBTRAIN_DBI: // possibly can allow LFSR use?
10237 break;
10238
10239 case DBTRAIN_LFSR: // forced already
10240 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10241 debug("ERROR: illegal HW assist mode %d\n", mode);
10242 new_mode = DBTRAIN_TEST;
10243 }
10244 break;
10245
10246 default:
10247 debug("ERROR: unknown HW assist mode %d\n", mode);
10248 }
10249
10250 if (new_mode != mode)
10251 debug("%s: changing mode %d to %d\n", __func__, mode, new_mode);
10252
10253 return new_mode;
10254}
10255
10256int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
10257 int mode, u64 *xor_data)
10258{
10259 int pattern;
10260 const u64 *pattern_p;
10261 int errs, errors = 0;
10262
10263 // FIXME? always choose LFSR if chip supports it???
10264 mode = choose_best_hw_patterns(lmc, mode);
10265
10266 for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10267 if (mode == DBTRAIN_LFSR) {
10268 setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10269 } else {
10270 pattern_p = byte_patterns[pattern];
10271 setup_hw_pattern(priv, lmc, pattern_p);
10272 }
10273 errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data);
10274
10275 debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
10276 __func__, pattern, phys_addr, errs);
10277
10278 errors |= errs;
10279 }
10280
10281 return errors;
10282}
10283
10284static void hw_assist_test_dll_offset(struct ddr_priv *priv,
10285 int dll_offset_mode, int lmc,
10286 int bytelane,
10287 int if_64b,
10288 u64 dram_tune_rank_offset,
10289 int dram_tune_byte_bursts)
10290{
10291 int byte_offset, new_best_offset[9];
10292 int rank_delay_start[4][9];
10293 int rank_delay_count[4][9];
10294 int rank_delay_best_start[4][9];
10295 int rank_delay_best_count[4][9];
10296 int errors[4], off_errors, tot_errors;
10297 int rank_mask, rankx, active_ranks;
10298 int pattern;
10299 const u64 *pattern_p;
10300 int byte;
10301 char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
10302 int pat_best_offset[9];
10303 u64 phys_addr;
10304 int pat_beg, pat_end;
10305 int rank_beg, rank_end;
10306 int byte_lo, byte_hi;
10307 union cvmx_lmcx_config lmcx_config;
10308 u64 hw_rank_offset;
10309 int num_lmcs = cvmx_dram_get_num_lmc(priv);
10310 // FIXME? always choose LFSR if chip supports it???
10311 int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST);
10312 int node = 0;
10313
10314 if (bytelane == 0x0A) { // all bytelanes
10315 byte_lo = 0;
10316 byte_hi = 8;
10317 } else { // just 1
10318 byte_lo = bytelane;
10319 byte_hi = bytelane;
10320 }
10321
10322 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10323 rank_mask = lmcx_config.s.init_status;
10324
10325 // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10326 hw_rank_offset =
10327 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena +
10328 (num_lmcs / 2));
10329
10330 debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n",
10331 node, __func__, lmc, (unsigned long long)hw_rank_offset);
10332
10333 // start of pattern loop
10334 // we do the set of tests for each pattern supplied...
10335
10336 memset(new_best_offset, 0, sizeof(new_best_offset));
10337 for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10338 memset(pat_best_offset, 0, sizeof(pat_best_offset));
10339
10340 if (mode == DBTRAIN_TEST) {
10341 pattern_p = byte_patterns[pattern];
10342 setup_hw_pattern(priv, lmc, pattern_p);
10343 } else {
10344 setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10345 }
10346
10347 // now loop through all legal values for the DLL byte offset...
10348
10349#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable?
10350
10351 tot_errors = 0;
10352
10353 memset(rank_delay_count, 0, sizeof(rank_delay_count));
10354 memset(rank_delay_start, 0, sizeof(rank_delay_start));
10355 memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
10356 memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
10357
10358 for (byte_offset = -63; byte_offset < 64;
10359 byte_offset += BYTE_OFFSET_INCR) {
10360 // do the setup on the active LMC
10361 // set the bytelanes DLL offsets
10362 change_dll_offset_enable(priv, lmc, 0);
10363 // FIXME? bytelane?
10364 load_dll_offset(priv, lmc, dll_offset_mode,
10365 byte_offset, bytelane);
10366 change_dll_offset_enable(priv, lmc, 1);
10367
10368 //bdk_watchdog_poke();
10369
10370 // run the test on each rank
10371 // only 1 call per rank should be enough, let the
10372 // bursts, loops, etc, control the load...
10373
10374 // errors for this byte_offset, all ranks
10375 off_errors = 0;
10376
10377 active_ranks = 0;
10378
10379 for (rankx = 0; rankx < 4; rankx++) {
10380 if (!(rank_mask & (1 << rankx)))
10381 continue;
10382
10383 phys_addr = hw_rank_offset * active_ranks;
10384 // FIXME: now done by test_dram_byte_hw()
10385 //phys_addr |= (lmc << 7);
10386 //phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT;
10387
10388 active_ranks++;
10389
10390 // NOTE: return is a now a bitmask of the
10391 // erroring bytelanes.
10392 errors[rankx] =
10393 test_dram_byte_hw(priv, lmc, phys_addr,
10394 mode, NULL);
10395
10396 // process any errors in the bytelane(s) that
10397 // are being tested
10398 for (byte = byte_lo; byte <= byte_hi; byte++) {
10399 // check errors
10400 // yes, an error in the byte lane in
10401 // this rank
10402 if (errors[rankx] & (1 << byte)) {
10403 off_errors |= (1 << byte);
10404
10405 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n",
10406 node, lmc, rankx, byte,
10407 mode_str, byte_offset,
10408 phys_addr);
10409
10410 // had started run
10411 if (rank_delay_count
10412 [rankx][byte] > 0) {
10413 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
10414 node, lmc, rankx,
10415 byte, mode_str,
10416 byte_offset);
10417 // stop now
10418 rank_delay_count
10419 [rankx][byte] =
10420 0;
10421 }
10422 // FIXME: else had not started
10423 // run - nothing else to do?
10424 } else {
10425 // no error in the byte lane
10426 // first success, set run start
10427 if (rank_delay_count[rankx]
10428 [byte] == 0) {
10429 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
10430 node, lmc, rankx,
10431 byte, mode_str,
10432 byte_offset);
10433 rank_delay_start[rankx]
10434 [byte] =
10435 byte_offset;
10436 }
10437 // bump run length
10438 rank_delay_count[rankx][byte]
10439 += BYTE_OFFSET_INCR;
10440
10441 // is this now the biggest
10442 // window?
10443 if (rank_delay_count[rankx]
10444 [byte] >
10445 rank_delay_best_count[rankx]
10446 [byte]) {
10447 rank_delay_best_count
10448 [rankx][byte] =
10449 rank_delay_count
10450 [rankx][byte];
10451 rank_delay_best_start
10452 [rankx][byte] =
10453 rank_delay_start
10454 [rankx][byte];
10455 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
10456 node, lmc, rankx,
10457 byte, mode_str,
10458 byte_offset,
10459 rank_delay_best_start
10460 [rankx][byte],
10461 rank_delay_best_count
10462 [rankx][byte]);
10463 }
10464 }
10465 }
10466 } /* for (rankx = 0; rankx < 4; rankx++) */
10467
10468 tot_errors |= off_errors;
10469 }
10470
10471 // set the bytelanes DLL offsets all back to 0
10472 change_dll_offset_enable(priv, lmc, 0);
10473 load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane);
10474 change_dll_offset_enable(priv, lmc, 1);
10475
10476 // now choose the best byte_offsets for this pattern
10477 // according to the best windows of the tested ranks
10478 // calculate offset by constructing an average window
10479 // from the rank windows
10480 for (byte = byte_lo; byte <= byte_hi; byte++) {
10481 pat_beg = -999;
10482 pat_end = 999;
10483
10484 for (rankx = 0; rankx < 4; rankx++) {
10485 if (!(rank_mask & (1 << rankx)))
10486 continue;
10487
10488 rank_beg = rank_delay_best_start[rankx][byte];
10489 pat_beg = max(pat_beg, rank_beg);
10490 rank_end = rank_beg +
10491 rank_delay_best_count[rankx][byte] -
10492 BYTE_OFFSET_INCR;
10493 pat_end = min(pat_end, rank_end);
10494
10495 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n",
10496 node, lmc, rankx, byte, mode_str,
10497 rank_beg, rank_end);
10498
10499 } /* for (rankx = 0; rankx < 4; rankx++) */
10500
10501 pat_best_offset[byte] = (pat_end + pat_beg) / 2;
10502
10503 // sum the pattern averages
10504 new_best_offset[byte] += pat_best_offset[byte];
10505 }
10506
10507 // now print them on 1 line, descending order...
10508 debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :",
10509 node, lmc, mode_str, pattern);
10510 for (byte = byte_hi; byte >= byte_lo; --byte)
10511 debug(" %4d", pat_best_offset[byte]);
10512 debug("\n");
10513 }
10514 // end of pattern loop
10515
10516 debug("N%d.LMC%d: HW DLL %s Offset Average : ", node, lmc, mode_str);
10517
10518 // print in decending byte index order
10519 for (byte = byte_hi; byte >= byte_lo; --byte) {
10520 // create the new average NINT
10521 new_best_offset[byte] = divide_nint(new_best_offset[byte],
10522 NUM_BYTE_PATTERNS);
10523
10524 // print the best offsets from all patterns
10525
10526 // print just the offset of all the bytes
10527 if (bytelane == 0x0A)
10528 debug("%4d ", new_best_offset[byte]);
10529 else // print the bytelanes also
10530 debug("(byte %d) %4d ", byte, new_best_offset[byte]);
10531
10532 // done with testing, load up the best offsets we found...
10533 // disable offsets while we load...
10534 change_dll_offset_enable(priv, lmc, 0);
10535 load_dll_offset(priv, lmc, dll_offset_mode,
10536 new_best_offset[byte], byte);
10537 // re-enable the offsets now that we are done loading
10538 change_dll_offset_enable(priv, lmc, 1);
10539 }
10540
10541 debug("\n");
10542}
10543
10544/*
10545 * Automatically adjust the DLL offset for the selected bytelane using
10546 * hardware-assist
10547 */
10548static int perform_HW_dll_offset_tuning(struct ddr_priv *priv,
10549 int dll_offset_mode, int bytelane)
10550{
10551 int if_64b;
10552 int save_ecc_ena[4];
10553 union cvmx_lmcx_config lmc_config;
10554 int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv);
10555 const char *s;
10556 int loops = 1, loop;
10557 int by;
10558 u64 dram_tune_rank_offset;
10559 int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
10560 int node = 0;
10561
10562 // see if we want to do the tuning more than once per LMC...
10563 s = env_get("ddr_tune_ecc_loops");
10564 if (s)
10565 loops = strtoul(s, NULL, 0);
10566
10567 // allow override of the test repeats (bursts)
10568 s = env_get("ddr_tune_byte_bursts");
10569 if (s)
10570 dram_tune_byte_bursts = strtoul(s, NULL, 10);
10571
10572 // print current working values
10573 debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
10574 node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS);
10575
10576 // FIXME? get flag from LMC0 only
10577 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));
10578 if_64b = !lmc_config.s.mode32b;
10579
10580 // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10581 dram_tune_rank_offset =
10582 1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena +
10583 (num_lmcs / 2));
10584
10585 // do once for each active LMC
10586
10587 for (lmc = 0; lmc < num_lmcs; lmc++) {
10588 debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n",
10589 node, lmc, bytelane);
10590
10591 /* Enable ECC for the HW tests */
10592 // NOTE: we do enable ECC, but the HW tests used will not
10593 // generate "visible" errors
10594 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10595 save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
10596 lmc_config.s.ecc_ena = 1;
10597 lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10598 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10599
10600 // testing is done on a single LMC at a time
10601 // FIXME: for now, loop here to show what happens multiple times
10602 for (loop = 0; loop < loops; loop++) {
10603 /* Perform DLL offset tuning */
10604 hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc,
10605 bytelane,
10606 if_64b, dram_tune_rank_offset,
10607 dram_tune_byte_bursts);
10608 }
10609
10610 // perform cleanup on active LMC
10611 debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n",
10612 node, lmc, bytelane);
10613
10614 /* Restore ECC for DRAM tests */
10615 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10616 lmc_config.s.ecc_ena = save_ecc_ena[lmc];
10617 lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10618 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10619
10620 // finally, see if there are any read offset overrides
10621 // after tuning
10622 for (by = 0; by < 9; by++) {
10623 s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by);
10624 if (s) {
10625 int dllro = strtoul(s, NULL, 10);
10626
10627 change_dll_offset_enable(priv, lmc, 0);
10628 load_dll_offset(priv, lmc, 2, dllro, by);
10629 change_dll_offset_enable(priv, lmc, 1);
10630 }
10631 }
10632
10633 } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
10634
10635 // finish up...
10636
10637 return 0;
10638
10639} /* perform_HW_dll_offset_tuning */
10640
10641// this routine simply makes the calls to the tuning routine and returns
10642// any errors
10643static int cvmx_tune_node(struct ddr_priv *priv)
10644{
10645 int errs, tot_errs;
10646 int do_dllwo = 0; // default to NO
10647 const char *str;
10648 int node = 0;
10649
10650 // Automatically tune the data and ECC byte DLL read offsets
10651 debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
10652 errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */);
10653 debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n",
10654 node, errs);
10655 tot_errs = errs;
10656
10657 // disabled by default for now, does not seem to be needed?
10658 // Automatically tune the data and ECC byte DLL write offsets
10659 // allow override of default setting
10660 str = env_get("ddr_tune_write_offsets");
10661 if (str)
10662 do_dllwo = !!strtoul(str, NULL, 0);
10663 if (do_dllwo) {
10664 debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
10665 errs =
10666 perform_HW_dll_offset_tuning(priv, 1,
10667 0x0A /* all bytelanes */);
10668 debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n",
10669 node, errs);
10670 tot_errs += errs;
10671 }
10672
10673 return tot_errs;
10674}
10675
10676// this routine makes the calls to the tuning routines when criteria are met
10677// intended to be called for automated tuning, to apply filtering...
10678
10679#define IS_DDR4 1
10680#define IS_DDR3 0
10681#define IS_RDIMM 1
10682#define IS_UDIMM 0
10683#define IS_1SLOT 1
10684#define IS_2SLOT 0
10685
10686// FIXME: DDR3 is not tuned
10687static const u32 ddr_speed_filter[2][2][2] = {
10688 [IS_DDR4] = {
10689 [IS_RDIMM] = {
10690 [IS_1SLOT] = 940,
10691 [IS_2SLOT] = 800},
10692 [IS_UDIMM] = {
10693 [IS_1SLOT] = 1050,
10694 [IS_2SLOT] = 940},
10695 },
10696 [IS_DDR3] = {
10697 [IS_RDIMM] = {
10698 [IS_1SLOT] = 0, // disabled
10699 [IS_2SLOT] = 0 // disabled
10700 },
10701 [IS_UDIMM] = {
10702 [IS_1SLOT] = 0, // disabled
10703 [IS_2SLOT] = 0 // disabled
10704 }
10705 }
10706};
10707
10708void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed)
10709{
10710 const char *s;
10711 union cvmx_lmcx_config lmc_config;
10712 union cvmx_lmcx_control lmc_control;
10713 union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl;
10714 int is_ddr4;
10715 int is_rdimm;
10716 int is_1slot;
10717 int do_tune = 0;
10718 u32 ddr_min_speed;
10719 int node = 0;
10720
10721 // scale it down from Hz to MHz
10722 ddr_speed = divide_nint(ddr_speed, 1000000);
10723
10724 // FIXME: allow an override here so that all configs can be tuned
10725 // or none
10726 // If the envvar is defined, always either force it or avoid it
10727 // accordingly
10728 s = env_get("ddr_tune_all_configs");
10729 if (s) {
10730 do_tune = !!strtoul(s, NULL, 0);
10731 printf("N%d: DRAM auto-tuning %s.\n", node,
10732 (do_tune) ? "forced" : "disabled");
10733 if (do_tune)
10734 cvmx_tune_node(priv);
10735
10736 return;
10737 }
10738
10739 // filter the tuning calls here...
10740 // determine if we should/can run automatically for this configuration
10741 //
10742 // FIXME: tune only when the configuration indicates it will help:
10743 // DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
10744 //
10745 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0)); // sample LMC0
10746 lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0)); // sample LMC0
10747 // sample LMC0
10748 lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10749
10750 is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0);
10751 is_rdimm = (lmc_control.s.rdimm_ena != 0);
10752 // HACK, should do better
10753 is_1slot = (lmc_config.s.init_status < 4);
10754
10755 ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
10756 do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed));
10757
10758 debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
10759 node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U',
10760 (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not");
10761
10762 // call the tuning routine, filtering is done...
10763 if (do_tune)
10764 cvmx_tune_node(priv);
10765}
10766
10767/*
10768 * first pattern example:
10769 * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10770 * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10771 * GENERAL_PURPOSE0.DATA == 16'h0000;
10772 */
10773
10774static const u64 dbi_pattern[3] = {
10775 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
10776
10777// Perform switchover to DBI
10778static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc)
10779{
10780 union cvmx_lmcx_modereg_params0 modereg_params0;
10781 union cvmx_lmcx_modereg_params3 modereg_params3;
10782 union cvmx_lmcx_phy_ctl phy_ctl;
10783 union cvmx_lmcx_config lmcx_config;
10784 union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
10785 int rank_mask, rankx, active_ranks;
10786 u64 phys_addr, rank_offset;
10787 int num_lmcs, errors;
10788 int dbi_settings[9], byte, unlocked, retries;
10789 int ecc_ena;
10790 int rank_max = 1; // FIXME: make this 4 to try all the ranks
10791 int node = 0;
10792
10793 ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10794
10795 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10796 rank_mask = lmcx_config.s.init_status;
10797 ecc_ena = lmcx_config.s.ecc_ena;
10798
10799 // FIXME: must filter out any non-supported configs
10800 // ie, no DDR3, no x4 devices
10801 if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) {
10802 debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
10803 node, lmc);
10804 return;
10805 }
10806
10807 // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10808 num_lmcs = cvmx_dram_get_num_lmc(priv);
10809 rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb -
10810 lmcx_config.s.rank_ena + (num_lmcs / 2));
10811
10812 debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
10813 node, lmc, rank_mask, (unsigned long long)rank_offset);
10814
10815 /*
10816 * 1. conduct the current init sequence as usual all the way
10817 * after software write leveling.
10818 */
10819
10820 read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10821
10822 display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
10823 " INIT");
10824
10825 /*
10826 * 2. set DBI related CSRs as below and issue MR write.
10827 * MODEREG_PARAMS3.WR_DBI=1
10828 * MODEREG_PARAMS3.RD_DBI=1
10829 * PHY_CTL.DBI_MODE_ENA=1
10830 */
10831 modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc));
10832
10833 modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc));
10834 modereg_params3.s.wr_dbi = 1;
10835 modereg_params3.s.rd_dbi = 1;
10836 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64);
10837
10838 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc));
10839 phy_ctl.s.dbi_mode_ena = 1;
10840 lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64);
10841
10842 /*
10843 * there are two options for data to send. Lets start with (1)
10844 * and could move to (2) in the future:
10845 *
10846 * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where
10847 * this does not exist) set data directly in these reigsters.
10848 * this will yield a clk/2 pattern:
10849 * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10850 * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10851 * GENERAL_PURPOSE0.DATA == 16'h0000;
10852 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10853 * here data comes from the LFSR generating a PRBS pattern
10854 * CHAR_CTL.EN = 0
10855 * CHAR_CTL.SEL = 0; // for PRBS
10856 * CHAR_CTL.DR = 1;
10857 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10858 * CHAR_CTL.SKEW_ON = 1;
10859 */
10860 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
10861 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
10862 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
10863
10864 /*
10865 * 3. adjust cas_latency (only necessary if RD_DBI is set).
10866 * here is my code for doing this:
10867 *
10868 * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
10869 * case (csr_model.MODEREG_PARAMS0.CL.value)
10870 * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2;
10871 * // CL 9-13 -> 11-15
10872 * 5: begin
10873 * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10874 * if((csr_model.MODEREG_PARAMS0.CWL.value==1 ||
10875 * csr_model.MODEREG_PARAMS0.CWL.value==3))
10876 * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
10877 * else
10878 * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
10879 * end
10880 * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
10881 * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
10882 * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
10883 * default:
10884 * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1,
10885 * I am not sure what to do.",
10886 * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
10887 * endcase
10888 * end
10889 */
10890
10891 if (modereg_params3.s.rd_dbi == 1) {
10892 int old_cl, new_cl, old_cwl;
10893
10894 old_cl = modereg_params0.s.cl;
10895 old_cwl = modereg_params0.s.cwl;
10896
10897 switch (old_cl) {
10898 case 0:
10899 case 1:
10900 case 2:
10901 case 3:
10902 case 4:
10903 new_cl = old_cl + 2;
10904 break; // 9-13->11-15
10905 // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10906 case 5:
10907 new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13;
10908 break;
10909 case 6:
10910 new_cl = 8;
10911 break; // 15->18
10912 case 7:
10913 new_cl = 14;
10914 break; // 16->19
10915 case 8:
10916 new_cl = 15;
10917 break; // 18->21
10918 default:
10919 printf("ERROR: Bad CL value (%d) for DBI switchover.\n",
10920 old_cl);
10921 // FIXME: need to error exit here...
10922 old_cl = -1;
10923 new_cl = -1;
10924 break;
10925 }
10926 debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
10927 node, lmc, old_cl, old_cwl, new_cl);
10928 modereg_params0.s.cl = new_cl;
10929 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc),
10930 modereg_params0.u64);
10931 }
10932
10933 /*
10934 * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence
10935 * SEQ_CTL[SEQ_SEL] = MRW.
10936 */
10937 // Use the default values, from the CSRs fields
10938 // also, do B-sides for RDIMMs...
10939
10940 for (rankx = 0; rankx < 4; rankx++) {
10941 if (!(rank_mask & (1 << rankx)))
10942 continue;
10943
10944 // for RDIMMs, B-side writes should get done automatically
10945 // when the A-side is written
10946 ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10947 0 /*MRreg */, 0 /*A-side */); /* MR0 */
10948 ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10949 5 /*MRreg */, 0 /*A-side */); /* MR5 */
10950 }
10951
10952 /*
10953 * 5. conduct DBI bit deskew training via the General Purpose
10954 * R/W sequence (dbtrain). may need to run this over and over to get
10955 * a lock (I need up to 5 in simulation):
10956 * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
10957 * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
10958 * DBTRAIN_CTL.READ_CMD_COUNT = all 1's
10959 * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
10960 * DBTRAIN_CTL.RW_TRAIN = 1
10961 * DBTRAIN_CTL.READ_DQ_COUNT = dont care
10962 * DBTRAIN_CTL.WRITE_ENA = 1;
10963 * DBTRAIN_CTL.ACTIVATE = 1;
10964 * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a
10965 * valid address
10966 */
10967
10968 // NOW - do the training
10969 debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc);
10970
10971 active_ranks = 0;
10972 for (rankx = 0; rankx < rank_max; rankx++) {
10973 if (!(rank_mask & (1 << rankx)))
10974 continue;
10975
10976 phys_addr = rank_offset * active_ranks;
10977 // FIXME: now done by test_dram_byte_hw()
10978
10979 active_ranks++;
10980
10981 retries = 0;
10982
10983restart_training:
10984
10985 // NOTE: return is a bitmask of the erroring bytelanes -
10986 // we only print it
10987 errors =
10988 test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL);
10989
10990 debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
10991 node, lmc, rankx, (unsigned long long)phys_addr, errors);
10992
10993 // NEXT - check for locking
10994 unlocked = 0;
10995 read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10996
10997 for (byte = 0; byte < (8 + ecc_ena); byte++)
10998 unlocked += (dbi_settings[byte] & 1) ^ 1;
10999
11000 // FIXME: print out the DBI settings array after each rank?
11001 if (rank_max > 1) // only when doing more than 1 rank
11002 display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena,
11003 dbi_settings, " RANK");
11004
11005 if (unlocked > 0) {
11006 debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
11007 node, lmc, unlocked);
11008 retries++;
11009 if (retries < 10) {
11010 goto restart_training;
11011 } else {
11012 debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
11013 node, lmc, retries);
11014 }
11015 }
11016 } /* for (rankx = 0; rankx < 4; rankx++) */
11017
11018 // print out the final DBI settings array
11019 display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
11020 "FINAL");
11021}
11022
11023void cvmx_dbi_switchover(struct ddr_priv *priv)
11024{
11025 int lmc;
11026 int num_lmcs = cvmx_dram_get_num_lmc(priv);
11027
11028 for (lmc = 0; lmc < num_lmcs; lmc++)
11029 cvmx_dbi_switchover_interface(priv, lmc);
11030}