blob: 349abc179fa893fbee2f1e9b7776c62e4a6a5a38 [file] [log] [blame]
Aaron Williamse60c6a72020-09-02 08:29:07 +02001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2020 Marvell International Ltd.
4 */
5
6#include <command.h>
7#include <dm.h>
8#include <hang.h>
9#include <i2c.h>
10#include <ram.h>
11#include <time.h>
12
13#include <linux/bitops.h>
14#include <linux/io.h>
15
16#include <mach/octeon_ddr.h>
17
18/* Random number generator stuff */
19
Aaron Williamse60c6a72020-09-02 08:29:07 +020020#define CVMX_OCT_DID_RNG 8ULL
21
Aaron Williamse60c6a72020-09-02 08:29:07 +020022static u64 cvmx_rng_get_random64(void)
23{
24 return csr_rd(cvmx_build_io_address(CVMX_OCT_DID_RNG, 0));
25}
26
27static void cvmx_rng_enable(void)
28{
29 u64 val;
30
31 val = csr_rd(CVMX_RNM_CTL_STATUS);
32 val |= BIT(0) | BIT(1);
33 csr_wr(CVMX_RNM_CTL_STATUS, val);
34}
35
36#define RLEVEL_PRINTALL_DEFAULT 1
37#define WLEVEL_PRINTALL_DEFAULT 1
38
39/*
40 * Define how many HW WL samples to take for majority voting.
41 * MUST BE odd!!
42 * Assume there should only be 2 possible values that will show up,
43 * so treat ties as a problem!!!
44 * NOTE: Do not change this without checking the code!!!
45 */
46#define WLEVEL_LOOPS_DEFAULT 5
47
48#define ENABLE_COMPUTED_VREF_ADJUSTMENT 1
49#define SW_WLEVEL_HW_DEFAULT 1
50#define DEFAULT_BEST_RANK_SCORE 9999999
51#define MAX_RANK_SCORE_LIMIT 99
52
53/*
54 * Define how many HW RL samples per rank to take multiple samples will
55 * allow looking for the best sample score
56 */
57#define RLEVEL_SAMPLES_DEFAULT 3
58
59#define ddr_seq_print(format, ...) do {} while (0)
60
61struct wlevel_bitcnt {
62 int bitcnt[4];
63};
64
65static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
66 int ecc_ena, int *settings, char *title);
67
68static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
69 int dac_value, int byte);
70
71/* "mode" arg */
72#define DBTRAIN_TEST 0
73#define DBTRAIN_DBI 1
74#define DBTRAIN_LFSR 2
75
76static int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
77 int mode, u64 *xor_data);
78
79#define LMC_DDR3_RESET_ASSERT 0
80#define LMC_DDR3_RESET_DEASSERT 1
81
82static void cn7xxx_lmc_ddr3_reset(struct ddr_priv *priv, int if_num, int reset)
83{
84 union cvmx_lmcx_reset_ctl reset_ctl;
85
86 /*
87 * 4. Deassert DDRn_RESET_L pin by writing
88 * LMC(0..3)_RESET_CTL[DDR3RST] = 1
89 * without modifying any other LMC(0..3)_RESET_CTL fields.
90 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
91 * 6. Wait a minimum of 500us. This guarantees the necessary T = 500us
92 * delay between DDRn_RESET_L deassertion and DDRn_DIMM*_CKE*
93 * assertion.
94 */
95 debug("LMC%d %s DDR_RESET_L\n", if_num,
96 (reset ==
97 LMC_DDR3_RESET_DEASSERT) ? "De-asserting" : "Asserting");
98
99 reset_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
100 reset_ctl.cn78xx.ddr3rst = reset;
101 lmc_wr(priv, CVMX_LMCX_RESET_CTL(if_num), reset_ctl.u64);
102
103 lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
104
105 udelay(500);
106}
107
108static void perform_lmc_reset(struct ddr_priv *priv, int node, int if_num)
109{
110 /*
111 * 5.9.6 LMC RESET Initialization
112 *
113 * The purpose of this step is to assert/deassert the RESET# pin at the
114 * DDR3/DDR4 parts.
115 *
116 * This LMC RESET step is done for all enabled LMCs.
117 *
118 * It may be appropriate to skip this step if the DDR3/DDR4 DRAM parts
119 * are in self refresh and are currently preserving their
120 * contents. (Software can determine this via
121 * LMC(0..3)_RESET_CTL[DDR3PSV] in some circumstances.) The remainder of
122 * this section assumes that the DRAM contents need not be preserved.
123 *
124 * The remainder of this section assumes that the CN78XX DDRn_RESET_L
125 * pin is attached to the RESET# pin of the attached DDR3/DDR4 parts,
126 * as will be appropriate in many systems.
127 *
128 * (In other systems, such as ones that can preserve DDR3/DDR4 part
129 * contents while CN78XX is powered down, it will not be appropriate to
130 * directly attach the CN78XX DDRn_RESET_L pin to DRESET# of the
131 * DDR3/DDR4 parts, and this section may not apply.)
132 *
133 * The remainder of this section describes the sequence for LMCn.
134 *
135 * Perform the following six substeps for LMC reset initialization:
136 *
137 * 1. If not done already, assert DDRn_RESET_L pin by writing
138 * LMC(0..3)_RESET_ CTL[DDR3RST] = 0 without modifying any other
139 * LMC(0..3)_RESET_CTL fields.
140 */
141
142 if (!ddr_memory_preserved(priv)) {
143 /*
144 * 2. Read LMC(0..3)_RESET_CTL and wait for the result.
145 */
146
147 lmc_rd(priv, CVMX_LMCX_RESET_CTL(if_num));
148
149 /*
150 * 3. Wait until RESET# assertion-time requirement from JEDEC
151 * DDR3/DDR4 specification is satisfied (200 us during a
152 * power-on ramp, 100ns when power is already stable).
153 */
154
155 udelay(200);
156
157 /*
158 * 4. Deassert DDRn_RESET_L pin by writing
159 * LMC(0..3)_RESET_CTL[DDR3RST] = 1
160 * without modifying any other LMC(0..3)_RESET_CTL fields.
161 * 5. Read LMC(0..3)_RESET_CTL and wait for the result.
162 * 6. Wait a minimum of 500us. This guarantees the necessary
163 * T = 500us delay between DDRn_RESET_L deassertion and
164 * DDRn_DIMM*_CKE* assertion.
165 */
166 cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
167
168 /* Toggle Reset Again */
169 /* That is, assert, then de-assert, one more time */
170 cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_ASSERT);
171 cn7xxx_lmc_ddr3_reset(priv, if_num, LMC_DDR3_RESET_DEASSERT);
172 }
173}
174
175void oct3_ddr3_seq(struct ddr_priv *priv, int rank_mask, int if_num,
176 int sequence)
177{
178 /*
179 * 3. Without changing any other fields in LMC(0)_CONFIG, write
180 * LMC(0)_CONFIG[RANKMASK] then write both
181 * LMC(0)_SEQ_CTL[SEQ_SEL,INIT_START] = 1 with a single CSR write
182 * operation. LMC(0)_CONFIG[RANKMASK] bits should be set to indicate
183 * the ranks that will participate in the sequence.
184 *
185 * The LMC(0)_SEQ_CTL[SEQ_SEL] value should select power-up/init or
186 * selfrefresh exit, depending on whether the DRAM parts are in
187 * self-refresh and whether their contents should be preserved. While
188 * LMC performs these sequences, it will not perform any other DDR3
189 * transactions. When the sequence is complete, hardware sets the
190 * LMC(0)_CONFIG[INIT_STATUS] bits for the ranks that have been
191 * initialized.
192 *
193 * If power-up/init is selected immediately following a DRESET
194 * assertion, LMC executes the sequence described in the "Reset and
195 * Initialization Procedure" section of the JEDEC DDR3
196 * specification. This includes activating CKE, writing all four DDR3
197 * mode registers on all selected ranks, and issuing the required
198 * ZQCL
199 * command. The LMC(0)_CONFIG[RANKMASK] value should select all ranks
200 * with attached DRAM in this case. If LMC(0)_CONTROL[RDIMM_ENA] = 1,
201 * LMC writes the JEDEC standard SSTE32882 control words selected by
202 * LMC(0)_DIMM_CTL[DIMM*_WMASK] between DDR_CKE* signal assertion and
203 * the first DDR3 mode register write operation.
204 * LMC(0)_DIMM_CTL[DIMM*_WMASK] should be cleared to 0 if the
205 * corresponding DIMM is not present.
206 *
207 * If self-refresh exit is selected, LMC executes the required SRX
208 * command followed by a refresh and ZQ calibration. Section 4.5
209 * describes behavior of a REF + ZQCS. LMC does not write the DDR3
210 * mode registers as part of this sequence, and the mode register
211 * parameters must match at self-refresh entry and exit times.
212 *
213 * 4. Read LMC(0)_SEQ_CTL and wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE]
214 * to be set.
215 *
216 * 5. Read LMC(0)_CONFIG[INIT_STATUS] and confirm that all ranks have
217 * been initialized.
218 */
219
220 union cvmx_lmcx_seq_ctl seq_ctl;
221 union cvmx_lmcx_config lmc_config;
222 int timeout;
223
224 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
225 lmc_config.s.rankmask = rank_mask;
226 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
227
228 seq_ctl.u64 = 0;
229
230 seq_ctl.s.init_start = 1;
231 seq_ctl.s.seq_sel = sequence;
232
233 ddr_seq_print
234 ("Performing LMC sequence: rank_mask=0x%02x, sequence=0x%x, %s\n",
235 rank_mask, sequence, sequence_str[sequence]);
236
237 if (seq_ctl.s.seq_sel == 3)
238 debug("LMC%d: Exiting Self-refresh Rank_mask:%x\n", if_num,
239 rank_mask);
240
241 lmc_wr(priv, CVMX_LMCX_SEQ_CTL(if_num), seq_ctl.u64);
242 lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
243
244 timeout = 100;
245 do {
246 udelay(100); /* Wait a while */
247 seq_ctl.u64 = lmc_rd(priv, CVMX_LMCX_SEQ_CTL(if_num));
248 if (--timeout == 0) {
249 printf("Sequence %d timed out\n", sequence);
250 break;
251 }
252 } while (seq_ctl.s.seq_complete != 1);
253
254 ddr_seq_print(" LMC sequence=%x: Completed.\n", sequence);
255}
256
257#define bdk_numa_get_address(n, p) ((p) | ((u64)n) << CVMX_NODE_MEM_SHIFT)
258#define AREA_BASE_OFFSET BIT_ULL(26)
259
260static int test_dram_byte64(struct ddr_priv *priv, int lmc, u64 p,
261 u64 bitmask, u64 *xor_data)
262{
263 u64 p1, p2, d1, d2;
264 u64 v, v1;
265 u64 p2offset = (1ULL << 26); // offset to area 2
266 u64 datamask;
267 u64 xor;
268 u64 i, j, k;
269 u64 ii;
270 int errors = 0;
271 //u64 index;
272 u64 pattern1 = cvmx_rng_get_random64();
273 u64 pattern2 = 0;
274 u64 bad_bits[2] = { 0, 0 };
275 int kbitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
276 union cvmx_l2c_ctl l2c_ctl;
277 int burst;
278 int saved_dissblkdty;
279 int node = 0;
280
281 // Force full cacheline write-backs to boost traffic
Stefan Roese1a035f82020-12-11 17:05:56 +0100282 l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
Aaron Williamse60c6a72020-09-02 08:29:07 +0200283 saved_dissblkdty = l2c_ctl.cn78xx.dissblkdty;
284 l2c_ctl.cn78xx.dissblkdty = 1;
Stefan Roese1a035f82020-12-11 17:05:56 +0100285 l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64);
Aaron Williamse60c6a72020-09-02 08:29:07 +0200286
287 if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
288 kbitno = 18;
289
290 // Byte lanes may be clear in the mask to indicate no testing on that
291 //lane.
292 datamask = bitmask;
293
294 /*
295 * Add offset to both test regions to not clobber boot stuff
296 * when running from L2 for NAND boot.
297 */
298 p += AREA_BASE_OFFSET; // make sure base is out of the way of boot
299
300 // final address must include LMC and node
301 p |= (lmc << 7); /* Map address into proper interface */
302 p = bdk_numa_get_address(node, p); /* Map to node */
303 p |= 1ull << 63;
304
305#define II_INC BIT_ULL(22)
306#define II_MAX BIT_ULL(22)
307#define K_INC BIT_ULL(14)
308#define K_MAX BIT_ULL(kbitno)
309#define J_INC BIT_ULL(9)
310#define J_MAX BIT_ULL(12)
311#define I_INC BIT_ULL(3)
312#define I_MAX BIT_ULL(7)
313
314 debug("N%d.LMC%d: %s: phys_addr=0x%llx/0x%llx (0x%llx)\n",
315 node, lmc, __func__, p, p + p2offset, 1ULL << kbitno);
316
317 // loops are ordered so that only a single 64-bit slot is written to
318 // each cacheline at one time, then the cachelines are forced out;
319 // this should maximize read/write traffic
320
321 // FIXME? extend the range of memory tested!!
322 for (ii = 0; ii < II_MAX; ii += II_INC) {
323 for (i = 0; i < I_MAX; i += I_INC) {
324 for (k = 0; k < K_MAX; k += K_INC) {
325 for (j = 0; j < J_MAX; j += J_INC) {
326 p1 = p + ii + k + j;
327 p2 = p1 + p2offset;
328
329 v = pattern1 * (p1 + i);
330 // write the same thing to both areas
331 v1 = v;
332
333 cvmx_write64_uint64(p1 + i, v);
334 cvmx_write64_uint64(p2 + i, v1);
335
336 CVMX_CACHE_WBIL2(p1, 0);
337 CVMX_CACHE_WBIL2(p2, 0);
338 }
339 }
340 }
341 }
342
343 CVMX_DCACHE_INVALIDATE;
344
345 debug("N%d.LMC%d: dram_tuning_mem_xor: done INIT loop\n", node, lmc);
346
347 /* Make a series of passes over the memory areas. */
348
349 for (burst = 0; burst < 1 /* was: dram_tune_use_bursts */ ; burst++) {
350 u64 this_pattern = cvmx_rng_get_random64();
351
352 pattern2 ^= this_pattern;
353
354 /*
355 * XOR the data with a random value, applying the change to both
356 * memory areas.
357 */
358
359 // FIXME? extend the range of memory tested!!
360 for (ii = 0; ii < II_MAX; ii += II_INC) {
361 // FIXME: rearranged, did not make much difference?
362 for (i = 0; i < I_MAX; i += I_INC) {
363 for (k = 0; k < K_MAX; k += K_INC) {
364 for (j = 0; j < J_MAX; j += J_INC) {
365 p1 = p + ii + k + j;
366 p2 = p1 + p2offset;
367
368 v = cvmx_read64_uint64(p1 +
369 i) ^
370 this_pattern;
371 v1 = cvmx_read64_uint64(p2 +
372 i) ^
373 this_pattern;
374
375 cvmx_write64_uint64(p1 + i, v);
376 cvmx_write64_uint64(p2 + i, v1);
377
378 CVMX_CACHE_WBIL2(p1, 0);
379 CVMX_CACHE_WBIL2(p2, 0);
380 }
381 }
382 }
383 }
384
385 CVMX_DCACHE_INVALIDATE;
386
387 debug("N%d.LMC%d: dram_tuning_mem_xor: done MODIFY loop\n",
388 node, lmc);
389
390 /*
391 * Look for differences in the areas. If there is a mismatch,
392 * reset both memory locations with the same pattern. Failing
393 * to do so means that on all subsequent passes the pair of
394 * locations remain out of sync giving spurious errors.
395 */
396
397 // FIXME: Change the loop order so that an entire cache line
398 // is compared at one time. This is so that a read
399 // error that occurs *anywhere* on the cacheline will
400 // be caught, rather than comparing only 1 cacheline
401 // slot at a time, where an error on a different
402 // slot will be missed that time around
403 // Does the above make sense?
404
405 // FIXME? extend the range of memory tested!!
406 for (ii = 0; ii < II_MAX; ii += II_INC) {
407 for (k = 0; k < K_MAX; k += K_INC) {
408 for (j = 0; j < J_MAX; j += J_INC) {
409 p1 = p + ii + k + j;
410 p2 = p1 + p2offset;
411
412 // process entire cachelines in the
413 //innermost loop
414 for (i = 0; i < I_MAX; i += I_INC) {
415 int bybit = 1;
416 // start in byte lane 0
417 u64 bymsk = 0xffULL;
418
419 // FIXME: this should predict
420 // what we find...???
421 v = ((p1 + i) * pattern1) ^
422 pattern2;
423 d1 = cvmx_read64_uint64(p1 + i);
424 d2 = cvmx_read64_uint64(p2 + i);
425
426 // union of error bits only in
427 // active byte lanes
428 xor = ((d1 ^ v) | (d2 ^ v)) &
429 datamask;
430
431 if (!xor)
432 continue;
433
434 // accumulate bad bits
435 bad_bits[0] |= xor;
436
437 while (xor != 0) {
438 debug("ERROR(%03d): [0x%016llX] [0x%016llX] expected 0x%016llX d1 %016llX d2 %016llX\n",
439 burst, p1, p2, v,
440 d1, d2);
441 // error(s) in this lane
442 if (xor & bymsk) {
443 // set the byte
444 // error bit
445 errors |= bybit;
446 // clear byte
447 // lane in
448 // error bits
449 xor &= ~bymsk;
450 // clear the
451 // byte lane in
452 // the mask
453 datamask &= ~bymsk;
454#if EXIT_WHEN_ALL_LANES_HAVE_ERRORS
455 // nothing
456 // left to do
457 if (datamask == 0) {
458 return errors;
459 }
460#endif /* EXIT_WHEN_ALL_LANES_HAVE_ERRORS */
461 }
462 // move mask into
463 // next byte lane
464 bymsk <<= 8;
465 // move bit into next
466 // byte position
467 bybit <<= 1;
468 }
469 }
470 CVMX_CACHE_WBIL2(p1, 0);
471 CVMX_CACHE_WBIL2(p2, 0);
472 }
473 }
474 }
475
476 debug("N%d.LMC%d: dram_tuning_mem_xor: done TEST loop\n",
477 node, lmc);
478 }
479
480 if (xor_data) { // send the bad bits back...
481 xor_data[0] = bad_bits[0];
482 xor_data[1] = bad_bits[1]; // let it be zeroed
483 }
484
485 // Restore original setting that could enable partial cacheline writes
Stefan Roese1a035f82020-12-11 17:05:56 +0100486 l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
Aaron Williamse60c6a72020-09-02 08:29:07 +0200487 l2c_ctl.cn78xx.dissblkdty = saved_dissblkdty;
Stefan Roese1a035f82020-12-11 17:05:56 +0100488 l2c_wr(priv, CVMX_L2C_CTL_REL, l2c_ctl.u64);
Aaron Williamse60c6a72020-09-02 08:29:07 +0200489
490 return errors;
491}
492
493static void ddr4_mrw(struct ddr_priv *priv, int if_num, int rank,
494 int mr_wr_addr, int mr_wr_sel, int mr_wr_bg1)
495{
496 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
497
498 lmc_mr_mpr_ctl.u64 = 0;
499 lmc_mr_mpr_ctl.cn78xx.mr_wr_addr = (mr_wr_addr == -1) ? 0 : mr_wr_addr;
500 lmc_mr_mpr_ctl.cn78xx.mr_wr_sel = mr_wr_sel;
501 lmc_mr_mpr_ctl.cn78xx.mr_wr_rank = rank;
502 lmc_mr_mpr_ctl.cn78xx.mr_wr_use_default_value =
503 (mr_wr_addr == -1) ? 1 : 0;
504 lmc_mr_mpr_ctl.cn78xx.mr_wr_bg1 = mr_wr_bg1;
505 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
506
507 /* Mode Register Write */
508 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
509}
510
511#define INV_A0_17(x) ((x) ^ 0x22bf8)
512
513static void set_mpr_mode(struct ddr_priv *priv, int rank_mask,
514 int if_num, int dimm_count, int mpr, int bg1)
515{
516 int rankx;
517
518 debug("All Ranks: Set mpr mode = %x %c-side\n",
519 mpr, (bg1 == 0) ? 'A' : 'B');
520
521 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
522 if (!(rank_mask & (1 << rankx)))
523 continue;
524 if (bg1 == 0) {
525 /* MR3 A-side */
526 ddr4_mrw(priv, if_num, rankx, mpr << 2, 3, bg1);
527 } else {
528 /* MR3 B-side */
529 ddr4_mrw(priv, if_num, rankx, INV_A0_17(mpr << 2), ~3,
530 bg1);
531 }
532 }
533}
534
535static void do_ddr4_mpr_read(struct ddr_priv *priv, int if_num,
536 int rank, int page, int location)
537{
538 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
539
540 lmc_mr_mpr_ctl.u64 = lmc_rd(priv, CVMX_LMCX_MR_MPR_CTL(if_num));
541 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = 0;
542 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
543 lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
544 lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
545 lmc_mr_mpr_ctl.cn70xx.mpr_wr = 0; /* Read=0, Write=1 */
546 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
547
548 /* MPR register access sequence */
549 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
550
551 debug("LMC_MR_MPR_CTL : 0x%016llx\n",
552 lmc_mr_mpr_ctl.u64);
553 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
554 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
555 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
556 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
557 debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n",
558 lmc_mr_mpr_ctl.cn70xx.mpr_loc);
559 debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n",
560 lmc_mr_mpr_ctl.cn70xx.mpr_wr);
561}
562
563static int set_rdimm_mode(struct ddr_priv *priv, int if_num, int enable)
564{
565 union cvmx_lmcx_control lmc_control;
566 int save_rdimm_mode;
567
568 lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
569 save_rdimm_mode = lmc_control.s.rdimm_ena;
570 lmc_control.s.rdimm_ena = enable;
571 debug("Setting RDIMM_ENA = %x\n", enable);
572 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), lmc_control.u64);
573
574 return save_rdimm_mode;
575}
576
577static void ddr4_mpr_read(struct ddr_priv *priv, int if_num, int rank,
578 int page, int location, u64 *mpr_data)
579{
580 do_ddr4_mpr_read(priv, if_num, rank, page, location);
581
582 mpr_data[0] = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
583}
584
585/* Display MPR values for Page */
586static void display_mpr_page(struct ddr_priv *priv, int rank_mask,
587 int if_num, int page)
588{
589 int rankx, location;
590 u64 mpr_data[3];
591
592 for (rankx = 0; rankx < 4; rankx++) {
593 if (!(rank_mask & (1 << rankx)))
594 continue;
595
596 debug("N0.LMC%d.R%d: MPR Page %d loc [0:3]: ",
597 if_num, rankx, page);
598 for (location = 0; location < 4; location++) {
599 ddr4_mpr_read(priv, if_num, rankx, page, location,
600 mpr_data);
601 debug("0x%02llx ", mpr_data[0] & 0xFF);
602 }
603 debug("\n");
604
605 } /* for (rankx = 0; rankx < 4; rankx++) */
606}
607
608static void ddr4_mpr_write(struct ddr_priv *priv, int if_num, int rank,
609 int page, int location, u8 mpr_data)
610{
611 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
612
613 lmc_mr_mpr_ctl.u64 = 0;
614 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mpr_data;
615 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = page; /* Page */
616 lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
617 lmc_mr_mpr_ctl.cn70xx.mpr_loc = location;
618 lmc_mr_mpr_ctl.cn70xx.mpr_wr = 1; /* Read=0, Write=1 */
619 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
620
621 /* MPR register access sequence */
622 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x9);
623
624 debug("LMC_MR_MPR_CTL : 0x%016llx\n",
625 lmc_mr_mpr_ctl.u64);
626 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_addr: 0x%02x\n",
627 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr);
628 debug("lmc_mr_mpr_ctl.cn70xx.mr_wr_sel : 0x%02x\n",
629 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel);
630 debug("lmc_mr_mpr_ctl.cn70xx.mpr_loc : 0x%02x\n",
631 lmc_mr_mpr_ctl.cn70xx.mpr_loc);
632 debug("lmc_mr_mpr_ctl.cn70xx.mpr_wr : 0x%02x\n",
633 lmc_mr_mpr_ctl.cn70xx.mpr_wr);
634}
635
636static void set_vref(struct ddr_priv *priv, int if_num, int rank,
637 int range, int value)
638{
639 union cvmx_lmcx_mr_mpr_ctl lmc_mr_mpr_ctl;
640 union cvmx_lmcx_modereg_params3 lmc_modereg_params3;
641 int mr_wr_addr = 0;
642
643 lmc_mr_mpr_ctl.u64 = 0;
644 lmc_modereg_params3.u64 = lmc_rd(priv,
645 CVMX_LMCX_MODEREG_PARAMS3(if_num));
646
647 /* A12:A10 tCCD_L */
648 mr_wr_addr |= lmc_modereg_params3.s.tccd_l << 10;
649 mr_wr_addr |= 1 << 7; /* A7 1 = Enable(Training Mode) */
650 mr_wr_addr |= range << 6; /* A6 vrefDQ Training Range */
651 mr_wr_addr |= value << 0; /* A5:A0 vrefDQ Training Value */
652
653 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
654 lmc_mr_mpr_ctl.cn70xx.mr_wr_sel = 6; /* Write MR6 */
655 lmc_mr_mpr_ctl.cn70xx.mr_wr_rank = rank;
656 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
657
658 /* 0x8 = Mode Register Write */
659 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
660
661 /*
662 * It is vendor specific whether vref_value is captured with A7=1.
663 * A subsequent MRS might be necessary.
664 */
665 oct3_ddr3_seq(priv, 1 << rank, if_num, 0x8);
666
667 mr_wr_addr &= ~(1 << 7); /* A7 0 = Disable(Training Mode) */
668 lmc_mr_mpr_ctl.cn70xx.mr_wr_addr = mr_wr_addr;
669 lmc_wr(priv, CVMX_LMCX_MR_MPR_CTL(if_num), lmc_mr_mpr_ctl.u64);
670}
671
672static void set_dram_output_inversion(struct ddr_priv *priv, int if_num,
673 int dimm_count, int rank_mask,
674 int inversion)
675{
676 union cvmx_lmcx_ddr4_dimm_ctl lmc_ddr4_dimm_ctl;
677 union cvmx_lmcx_dimmx_params lmc_dimmx_params;
678 union cvmx_lmcx_dimm_ctl lmc_dimm_ctl;
679 int dimm_no;
680
681 /* Don't touch extenced register control words */
682 lmc_ddr4_dimm_ctl.u64 = 0;
683 lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), lmc_ddr4_dimm_ctl.u64);
684
685 debug("All DIMMs: Register Control Word RC0 : %x\n",
686 (inversion & 1));
687
688 for (dimm_no = 0; dimm_no < dimm_count; ++dimm_no) {
689 lmc_dimmx_params.u64 =
690 lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num));
691 lmc_dimmx_params.s.rc0 =
692 (lmc_dimmx_params.s.rc0 & ~1) | (inversion & 1);
693
694 lmc_wr(priv,
695 CVMX_LMCX_DIMMX_PARAMS(dimm_no, if_num),
696 lmc_dimmx_params.u64);
697 }
698
699 /* LMC0_DIMM_CTL */
700 lmc_dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
701 lmc_dimm_ctl.s.dimm0_wmask = 0x1;
702 lmc_dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ? 0x0001 : 0x0000;
703
704 debug("LMC DIMM_CTL : 0x%016llx\n",
705 lmc_dimm_ctl.u64);
706 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), lmc_dimm_ctl.u64);
707
708 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7); /* Init RCW */
709}
710
711static void write_mpr_page0_pattern(struct ddr_priv *priv, int rank_mask,
712 int if_num, int dimm_count, int pattern,
713 int location_mask)
714{
715 int rankx;
716 int location;
717
718 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
719 if (!(rank_mask & (1 << rankx)))
720 continue;
721 for (location = 0; location < 4; ++location) {
722 if (!(location_mask & (1 << location)))
723 continue;
724
725 ddr4_mpr_write(priv, if_num, rankx,
726 /* page */ 0, /* location */ location,
727 pattern);
728 }
729 }
730}
731
732static void change_rdimm_mpr_pattern(struct ddr_priv *priv, int rank_mask,
733 int if_num, int dimm_count)
734{
735 int save_ref_zqcs_int;
736 union cvmx_lmcx_config lmc_config;
737
738 /*
739 * Okay, here is the latest sequence. This should work for all
740 * chips and passes (78,88,73,etc). This sequence should be run
741 * immediately after DRAM INIT. The basic idea is to write the
742 * same pattern into each of the 4 MPR locations in the DRAM, so
743 * that the same value is returned when doing MPR reads regardless
744 * of the inversion state. My advice is to put this into a
745 * function, change_rdimm_mpr_pattern or something like that, so
746 * that it can be called multiple times, as I think David wants a
747 * clock-like pattern for OFFSET training, but does not want a
748 * clock pattern for Bit-Deskew. You should then be able to call
749 * this at any point in the init sequence (after DRAM init) to
750 * change the pattern to a new value.
751 * Mike
752 *
753 * A correction: PHY doesn't need any pattern during offset
754 * training, but needs clock like pattern for internal vref and
755 * bit-dskew training. So for that reason, these steps below have
756 * to be conducted before those trainings to pre-condition
757 * the pattern. David
758 *
759 * Note: Step 3, 4, 8 and 9 have to be done through RDIMM
760 * sequence. If you issue MRW sequence to do RCW write (in o78 pass
761 * 1 at least), LMC will still do two commands because
762 * CONTROL[RDIMM_ENA] is still set high. We don't want it to have
763 * any unintentional mode register write so it's best to do what
764 * Mike is doing here.
765 * Andrew
766 */
767
768 /* 1) Disable refresh (REF_ZQCS_INT = 0) */
769
770 debug("1) Disable refresh (REF_ZQCS_INT = 0)\n");
771
772 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
773 save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
774 lmc_config.cn78xx.ref_zqcs_int = 0;
775 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
776
777 /*
778 * 2) Put all devices in MPR mode (Run MRW sequence (sequence=8)
779 * with MODEREG_PARAMS0[MPRLOC]=0,
780 * MODEREG_PARAMS0[MPR]=1, MR_MPR_CTL[MR_WR_SEL]=3, and
781 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
782 */
783
784 debug("2) Put all devices in MPR mode (Run MRW sequence (sequence=8)\n");
785
786 /* A-side */
787 set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 0);
788 /* B-side */
789 set_mpr_mode(priv, rank_mask, if_num, dimm_count, 1, 1);
790
791 /*
792 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and set
793 * the value you would like directly into
794 * MR_MPR_CTL[MR_WR_ADDR]
795 */
796
797 /*
798 * 3) Disable RCD Parity (if previously enabled) - parity does not
799 * work if inversion disabled
800 */
801
802 debug("3) Disable RCD Parity\n");
803
804 /*
805 * 4) Disable Inversion in the RCD.
806 * a. I did (3&4) via the RDIMM sequence (seq_sel=7), but it
807 * may be easier to use the MRW sequence (seq_sel=8). Just set
808 * MR_MPR_CTL[MR_WR_SEL]=7, MR_MPR_CTL[MR_WR_ADDR][3:0]=data,
809 * MR_MPR_CTL[MR_WR_ADDR][7:4]=RCD reg
810 */
811
812 debug("4) Disable Inversion in the RCD.\n");
813
814 set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 1);
815
816 /*
817 * 5) Disable CONTROL[RDIMM_ENA] so that MR sequence goes out
818 * non-inverted.
819 */
820
821 debug("5) Disable CONTROL[RDIMM_ENA]\n");
822
823 set_rdimm_mode(priv, if_num, 0);
824
825 /*
826 * 6) Write all 4 MPR registers with the desired pattern (have to
827 * do this for all enabled ranks)
828 * a. MR_MPR_CTL.MPR_WR=1, MR_MPR_CTL.MPR_LOC=0..3,
829 * MR_MPR_CTL.MR_WR_SEL=0, MR_MPR_CTL.MR_WR_ADDR[7:0]=pattern
830 */
831
832 debug("6) Write all 4 MPR page 0 Training Patterns\n");
833
834 write_mpr_page0_pattern(priv, rank_mask, if_num, dimm_count, 0x55, 0x8);
835
836 /* 7) Re-enable RDIMM_ENA */
837
838 debug("7) Re-enable RDIMM_ENA\n");
839
840 set_rdimm_mode(priv, if_num, 1);
841
842 /* 8) Re-enable RDIMM inversion */
843
844 debug("8) Re-enable RDIMM inversion\n");
845
846 set_dram_output_inversion(priv, if_num, dimm_count, rank_mask, 0);
847
848 /* 9) Re-enable RDIMM parity (if desired) */
849
850 debug("9) Re-enable RDIMM parity (if desired)\n");
851
852 /*
853 * 10)Take B-side devices out of MPR mode (Run MRW sequence
854 * (sequence=8) with MODEREG_PARAMS0[MPRLOC]=0,
855 * MODEREG_PARAMS0[MPR]=0, MR_MPR_CTL[MR_WR_SEL]=3, and
856 * MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=1)
857 */
858
859 debug("10)Take B-side devices out of MPR mode\n");
860
861 set_mpr_mode(priv, rank_mask, if_num, dimm_count,
862 /* mpr */ 0, /* bg1 */ 1);
863
864 /*
865 * a. Or you can set MR_MPR_CTL[MR_WR_USE_DEFAULT_VALUE]=0 and
866 * set the value you would like directly into MR_MPR_CTL[MR_WR_ADDR]
867 */
868
869 /* 11)Re-enable refresh (REF_ZQCS_INT=previous value) */
870
871 debug("11)Re-enable refresh (REF_ZQCS_INT=previous value)\n");
872
873 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
874 lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
875 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
876}
877
878static int validate_hwl_seq(int *wl, int *seq)
879{
880 // sequence index, step through the sequence array
881 int seqx;
882 int bitnum;
883
884 seqx = 0;
885
886 while (seq[seqx + 1] >= 0) { // stop on next seq entry == -1
887 // but now, check current versus next
888 bitnum = (wl[seq[seqx]] << 2) | wl[seq[seqx + 1]];
889 // magic validity number (see matrix above)
890 if (!((1 << bitnum) & 0xBDE7))
891 return 1;
892 seqx++;
893 }
894
895 return 0;
896}
897
898static int validate_hw_wl_settings(int if_num,
899 union cvmx_lmcx_wlevel_rankx
900 *lmc_wlevel_rank, int is_rdimm, int ecc_ena)
901{
902 int wl[9], byte, errors;
903
904 // arrange the sequences so
905 // index 0 has byte 0, etc, ECC in middle
906 int useq[] = { 0, 1, 2, 3, 8, 4, 5, 6, 7, -1 };
907 // index 0 is ECC, then go down
908 int rseq1[] = { 8, 3, 2, 1, 0, -1 };
909 // index 0 has byte 4, then go up
910 int rseq2[] = { 4, 5, 6, 7, -1 };
911 // index 0 has byte 0, etc, no ECC
912 int useqno[] = { 0, 1, 2, 3, 4, 5, 6, 7, -1 };
913 // index 0 is byte 3, then go down, no ECC
914 int rseq1no[] = { 3, 2, 1, 0, -1 };
915
916 // in the CSR, bytes 0-7 are always data, byte 8 is ECC
917 for (byte = 0; byte < (8 + ecc_ena); byte++) {
918 // preprocess :-)
919 wl[byte] = (get_wl_rank(lmc_wlevel_rank, byte) >>
920 1) & 3;
921 }
922
923 errors = 0;
924 if (is_rdimm) { // RDIMM order
925 errors = validate_hwl_seq(wl, (ecc_ena) ? rseq1 : rseq1no);
926 errors += validate_hwl_seq(wl, rseq2);
927 } else { // UDIMM order
928 errors = validate_hwl_seq(wl, (ecc_ena) ? useq : useqno);
929 }
930
931 return errors;
932}
933
934static unsigned int extr_wr(u64 u, int x)
935{
936 return (unsigned int)(((u >> (x * 12 + 5)) & 0x3ULL) |
937 ((u >> (51 + x - 2)) & 0x4ULL));
938}
939
940static void insrt_wr(u64 *up, int x, int v)
941{
942 u64 u = *up;
943
944 u &= ~(((0x3ULL) << (x * 12 + 5)) | ((0x1ULL) << (51 + x)));
945 *up = (u | ((v & 0x3ULL) << (x * 12 + 5)) |
946 ((v & 0x4ULL) << (51 + x - 2)));
947}
948
949/* Read out Deskew Settings for DDR */
950
951struct deskew_bytes {
952 u16 bits[8];
953};
954
955struct deskew_data {
956 struct deskew_bytes bytes[9];
957};
958
959struct dac_data {
960 int bytes[9];
961};
962
963// T88 pass 1, skip 4=DAC
964static const u8 dsk_bit_seq_p1[8] = { 0, 1, 2, 3, 5, 6, 7, 8 };
965// T88 Pass 2, skip 4=DAC and 5=DBI
966static const u8 dsk_bit_seq_p2[8] = { 0, 1, 2, 3, 6, 7, 8, 9 };
967
968static void get_deskew_settings(struct ddr_priv *priv, int if_num,
969 struct deskew_data *dskdat)
970{
971 union cvmx_lmcx_phy_ctl phy_ctl;
972 union cvmx_lmcx_config lmc_config;
973 int bit_index;
974 int byte_lane, byte_limit;
975 // NOTE: these are for pass 2.x
976 int is_o78p2 = !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X);
977 const u8 *bit_seq = (is_o78p2) ? dsk_bit_seq_p2 : dsk_bit_seq_p1;
978
979 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
980 byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
981
982 memset(dskdat, 0, sizeof(*dskdat));
983
984 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
985 phy_ctl.s.dsk_dbg_clk_scaler = 3;
986
987 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
988 phy_ctl.s.dsk_dbg_byte_sel = byte_lane; // set byte lane
989
990 for (bit_index = 0; bit_index < 8; ++bit_index) {
991 // set bit number and start read sequence
992 phy_ctl.s.dsk_dbg_bit_sel = bit_seq[bit_index];
993 phy_ctl.s.dsk_dbg_rd_start = 1;
994 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
995
996 // poll for read sequence to complete
997 do {
998 phy_ctl.u64 =
999 lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1000 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1001
1002 // record the data
1003 dskdat->bytes[byte_lane].bits[bit_index] =
1004 phy_ctl.s.dsk_dbg_rd_data & 0x3ff;
1005 }
1006 }
1007}
1008
1009static void display_deskew_settings(struct ddr_priv *priv, int if_num,
1010 struct deskew_data *dskdat,
1011 int print_enable)
1012{
1013 int byte_lane;
1014 int bit_num;
1015 u16 flags, deskew;
1016 union cvmx_lmcx_config lmc_config;
1017 int byte_limit;
1018 const char *fc = " ?-=+*#&";
1019
1020 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1021 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1022
1023 if (print_enable) {
1024 debug("N0.LMC%d: Deskew Data: Bit => :",
1025 if_num);
1026 for (bit_num = 7; bit_num >= 0; --bit_num)
1027 debug(" %3d ", bit_num);
1028 debug("\n");
1029 }
1030
1031 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1032 if (print_enable)
1033 debug("N0.LMC%d: Bit Deskew Byte %d %s :",
1034 if_num, byte_lane,
1035 (print_enable >= 3) ? "FINAL" : " ");
1036
1037 for (bit_num = 7; bit_num >= 0; --bit_num) {
1038 flags = dskdat->bytes[byte_lane].bits[bit_num] & 7;
1039 deskew = dskdat->bytes[byte_lane].bits[bit_num] >> 3;
1040
1041 if (print_enable)
1042 debug(" %3d %c", deskew, fc[flags ^ 1]);
1043
1044 } /* for (bit_num = 7; bit_num >= 0; --bit_num) */
1045
1046 if (print_enable)
1047 debug("\n");
1048 }
1049}
1050
1051static void override_deskew_settings(struct ddr_priv *priv, int if_num,
1052 struct deskew_data *dskdat)
1053{
1054 union cvmx_lmcx_phy_ctl phy_ctl;
1055 union cvmx_lmcx_config lmc_config;
1056
1057 int bit, byte_lane, byte_limit;
1058 u64 csr_data;
1059
1060 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1061 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1062
1063 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1064
1065 phy_ctl.s.phy_reset = 0;
1066 phy_ctl.s.dsk_dbg_num_bits_sel = 1;
1067 phy_ctl.s.dsk_dbg_offset = 0;
1068 phy_ctl.s.dsk_dbg_clk_scaler = 3;
1069
1070 phy_ctl.s.dsk_dbg_wr_mode = 1;
1071 phy_ctl.s.dsk_dbg_load_dis = 0;
1072 phy_ctl.s.dsk_dbg_overwrt_ena = 0;
1073
1074 phy_ctl.s.phy_dsk_reset = 0;
1075
1076 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1077 lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1078
1079 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1080 csr_data = 0;
1081 // FIXME: can we ignore DBI?
1082 for (bit = 0; bit < 8; ++bit) {
1083 // fetch input and adjust
1084 u64 bits = (dskdat->bytes[byte_lane].bits[bit] >> 3) &
1085 0x7F;
1086
1087 /*
1088 * lmc_general_purpose0.data[6:0] // DQ0
1089 * lmc_general_purpose0.data[13:7] // DQ1
1090 * lmc_general_purpose0.data[20:14] // DQ2
1091 * lmc_general_purpose0.data[27:21] // DQ3
1092 * lmc_general_purpose0.data[34:28] // DQ4
1093 * lmc_general_purpose0.data[41:35] // DQ5
1094 * lmc_general_purpose0.data[48:42] // DQ6
1095 * lmc_general_purpose0.data[55:49] // DQ7
1096 * lmc_general_purpose0.data[62:56] // DBI
1097 */
1098 csr_data |= (bits << (7 * bit));
1099
1100 } /* for (bit = 0; bit < 8; ++bit) */
1101
1102 // update GP0 with the bit data for this byte lane
1103 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num), csr_data);
1104 lmc_rd(priv, CVMX_LMCX_GENERAL_PURPOSE0(if_num));
1105
1106 // start the deskew load sequence
1107 phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1108 phy_ctl.s.dsk_dbg_rd_start = 1;
1109 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1110
1111 // poll for read sequence to complete
1112 do {
1113 udelay(100);
1114 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1115 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1116 }
1117
1118 // tell phy to use the new settings
1119 phy_ctl.s.dsk_dbg_overwrt_ena = 1;
1120 phy_ctl.s.dsk_dbg_rd_start = 0;
1121 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1122
1123 phy_ctl.s.dsk_dbg_wr_mode = 0;
1124 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1125}
1126
1127static void process_by_rank_dac(struct ddr_priv *priv, int if_num,
1128 int rank_mask, struct dac_data *dacdat)
1129{
1130 union cvmx_lmcx_config lmc_config;
1131 int rankx, byte_lane;
1132 int byte_limit;
1133 int rank_count;
1134 struct dac_data dacsum;
1135 int lane_probs;
1136
1137 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1138 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1139
1140 memset((void *)&dacsum, 0, sizeof(dacsum));
1141 rank_count = 0;
1142 lane_probs = 0;
1143
1144 for (rankx = 0; rankx < 4; rankx++) {
1145 if (!(rank_mask & (1 << rankx)))
1146 continue;
1147 rank_count++;
1148
1149 display_dac_dbi_settings(if_num, /*dac */ 1,
1150 lmc_config.s.ecc_ena,
1151 &dacdat[rankx].bytes[0],
1152 "By-Ranks VREF");
1153 // sum
1154 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1155 if (rank_count == 2) {
1156 int ranks_diff =
1157 abs((dacsum.bytes[byte_lane] -
1158 dacdat[rankx].bytes[byte_lane]));
1159
1160 // FIXME: is 19 a good number?
1161 if (ranks_diff > 19)
1162 lane_probs |= (1 << byte_lane);
1163 }
1164 dacsum.bytes[byte_lane] +=
1165 dacdat[rankx].bytes[byte_lane];
1166 }
1167 }
1168
1169 // average
1170 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++)
1171 dacsum.bytes[byte_lane] /= rank_count; // FIXME: nint?
1172
1173 display_dac_dbi_settings(if_num, /*dac */ 1, lmc_config.s.ecc_ena,
1174 &dacsum.bytes[0], "All-Rank VREF");
1175
1176 if (lane_probs) {
1177 debug("N0.LMC%d: All-Rank VREF DAC Problem Bytelane(s): 0x%03x\n",
1178 if_num, lane_probs);
1179 }
1180
1181 // finally, write the averaged DAC values
1182 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1183 load_dac_override(priv, if_num, dacsum.bytes[byte_lane],
1184 byte_lane);
1185 }
1186}
1187
1188static void process_by_rank_dsk(struct ddr_priv *priv, int if_num,
1189 int rank_mask, struct deskew_data *dskdat)
1190{
1191 union cvmx_lmcx_config lmc_config;
1192 int rankx, lane, bit;
1193 int byte_limit;
1194 struct deskew_data dsksum, dskcnt;
1195 u16 deskew;
1196
1197 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1198 byte_limit = ((lmc_config.s.mode32b) ? 4 : 8) + lmc_config.s.ecc_ena;
1199
1200 memset((void *)&dsksum, 0, sizeof(dsksum));
1201 memset((void *)&dskcnt, 0, sizeof(dskcnt));
1202
1203 for (rankx = 0; rankx < 4; rankx++) {
1204 if (!(rank_mask & (1 << rankx)))
1205 continue;
1206
1207 // sum ranks
1208 for (lane = 0; lane < byte_limit; lane++) {
1209 for (bit = 0; bit < 8; ++bit) {
1210 deskew = dskdat[rankx].bytes[lane].bits[bit];
1211 // if flags indicate sat hi or lo, skip it
1212 if (deskew & 6)
1213 continue;
1214
1215 // clear flags
1216 dsksum.bytes[lane].bits[bit] +=
1217 deskew & ~7;
1218 // count entries
1219 dskcnt.bytes[lane].bits[bit] += 1;
1220 }
1221 }
1222 }
1223
1224 // average ranks
1225 for (lane = 0; lane < byte_limit; lane++) {
1226 for (bit = 0; bit < 8; ++bit) {
1227 int div = dskcnt.bytes[lane].bits[bit];
1228
1229 if (div > 0) {
1230 dsksum.bytes[lane].bits[bit] /= div;
1231 // clear flags
1232 dsksum.bytes[lane].bits[bit] &= ~7;
1233 // set LOCK
1234 dsksum.bytes[lane].bits[bit] |= 1;
1235 } else {
1236 // FIXME? use reset value?
1237 dsksum.bytes[lane].bits[bit] =
1238 (64 << 3) | 1;
1239 }
1240 }
1241 }
1242
1243 // TME for FINAL version
1244 display_deskew_settings(priv, if_num, &dsksum, /*VBL_TME */ 3);
1245
1246 // finally, write the averaged DESKEW values
1247 override_deskew_settings(priv, if_num, &dsksum);
1248}
1249
1250struct deskew_counts {
1251 int saturated; // number saturated
1252 int unlocked; // number unlocked
1253 int nibrng_errs; // nibble range errors
1254 int nibunl_errs; // nibble unlocked errors
1255 int bitval_errs; // bit value errors
1256};
1257
1258#define MIN_BITVAL 17
1259#define MAX_BITVAL 110
1260
1261static void validate_deskew_training(struct ddr_priv *priv, int rank_mask,
1262 int if_num, struct deskew_counts *counts,
1263 int print_flags)
1264{
1265 int byte_lane, bit_index, nib_num;
1266 int nibrng_errs, nibunl_errs, bitval_errs;
1267 union cvmx_lmcx_config lmc_config;
1268 s16 nib_min[2], nib_max[2], nib_unl[2];
1269 int byte_limit;
1270 int print_enable = print_flags & 1;
1271 struct deskew_data dskdat;
1272 s16 flags, deskew;
1273 const char *fc = " ?-=+*#&";
1274 int bit_last;
1275
1276 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
1277 byte_limit = ((!lmc_config.s.mode32b) ? 8 : 4) + lmc_config.s.ecc_ena;
1278
1279 memset(counts, 0, sizeof(struct deskew_counts));
1280
1281 get_deskew_settings(priv, if_num, &dskdat);
1282
1283 if (print_enable) {
1284 debug("N0.LMC%d: Deskew Settings: Bit => :",
1285 if_num);
1286 for (bit_index = 7; bit_index >= 0; --bit_index)
1287 debug(" %3d ", bit_index);
1288 debug("\n");
1289 }
1290
1291 for (byte_lane = 0; byte_lane < byte_limit; byte_lane++) {
1292 if (print_enable)
1293 debug("N0.LMC%d: Bit Deskew Byte %d %s :",
1294 if_num, byte_lane,
1295 (print_flags & 2) ? "FINAL" : " ");
1296
1297 nib_min[0] = 127;
1298 nib_min[1] = 127;
1299 nib_max[0] = 0;
1300 nib_max[1] = 0;
1301 nib_unl[0] = 0;
1302 nib_unl[1] = 0;
1303
1304 if (lmc_config.s.mode32b == 1 && byte_lane == 4) {
1305 bit_last = 3;
1306 if (print_enable)
1307 debug(" ");
1308 } else {
1309 bit_last = 7;
1310 }
1311
1312 for (bit_index = bit_last; bit_index >= 0; --bit_index) {
1313 nib_num = (bit_index > 3) ? 1 : 0;
1314
1315 flags = dskdat.bytes[byte_lane].bits[bit_index] & 7;
1316 deskew = dskdat.bytes[byte_lane].bits[bit_index] >> 3;
1317
1318 counts->saturated += !!(flags & 6);
1319
1320 // Do range calc even when locked; it could happen
1321 // that a bit is still unlocked after final retry,
1322 // and we want to have an external retry if a RANGE
1323 // error is present at exit...
1324 nib_min[nib_num] = min(nib_min[nib_num], deskew);
1325 nib_max[nib_num] = max(nib_max[nib_num], deskew);
1326
1327 if (!(flags & 1)) { // only when not locked
1328 counts->unlocked += 1;
1329 nib_unl[nib_num] += 1;
1330 }
1331
1332 if (print_enable)
1333 debug(" %3d %c", deskew, fc[flags ^ 1]);
1334 }
1335
1336 /*
1337 * Now look for nibble errors
1338 *
1339 * For bit 55, it looks like a bit deskew problem. When the
1340 * upper nibble of byte 6 needs to go to saturation, bit 7
1341 * of byte 6 locks prematurely at 64. For DIMMs with raw
1342 * card A and B, can we reset the deskew training when we
1343 * encounter this case? The reset criteria should be looking
1344 * at one nibble at a time for raw card A and B; if the
1345 * bit-deskew setting within a nibble is different by > 33,
1346 * we'll issue a reset to the bit deskew training.
1347 *
1348 * LMC0 Bit Deskew Byte(6): 64 0 - 0 - 0 - 26 61 35 64
1349 */
1350 // upper nibble range, then lower nibble range
1351 nibrng_errs = ((nib_max[1] - nib_min[1]) > 33) ? 1 : 0;
1352 nibrng_errs |= ((nib_max[0] - nib_min[0]) > 33) ? 1 : 0;
1353
1354 // check for nibble all unlocked
1355 nibunl_errs = ((nib_unl[0] == 4) || (nib_unl[1] == 4)) ? 1 : 0;
1356
1357 // check for bit value errors, ie < 17 or > 110
1358 // FIXME? assume max always > MIN_BITVAL and min < MAX_BITVAL
1359 bitval_errs = ((nib_max[1] > MAX_BITVAL) ||
1360 (nib_max[0] > MAX_BITVAL)) ? 1 : 0;
1361 bitval_errs |= ((nib_min[1] < MIN_BITVAL) ||
1362 (nib_min[0] < MIN_BITVAL)) ? 1 : 0;
1363
1364 if ((nibrng_errs != 0 || nibunl_errs != 0 ||
1365 bitval_errs != 0) && print_enable) {
1366 debug(" %c%c%c",
1367 (nibrng_errs) ? 'R' : ' ',
1368 (nibunl_errs) ? 'U' : ' ',
1369 (bitval_errs) ? 'V' : ' ');
1370 }
1371
1372 if (print_enable)
1373 debug("\n");
1374
1375 counts->nibrng_errs |= (nibrng_errs << byte_lane);
1376 counts->nibunl_errs |= (nibunl_errs << byte_lane);
1377 counts->bitval_errs |= (bitval_errs << byte_lane);
1378 }
1379}
1380
1381static unsigned short load_dac_override(struct ddr_priv *priv, int if_num,
1382 int dac_value, int byte)
1383{
1384 union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1385 // single bytelanes incr by 1; A is for ALL
1386 int bytex = (byte == 0x0A) ? byte : byte + 1;
1387
1388 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1389
1390 SET_DDR_DLL_CTL3(byte_sel, bytex);
1391 SET_DDR_DLL_CTL3(offset, dac_value >> 1);
1392
1393 ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */
1394 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1395
1396 ddr_dll_ctl3.cn73xx.bit_select = 0xC; /* vref bypass setting load */
1397 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1398
1399 ddr_dll_ctl3.cn73xx.bit_select = 0xD; /* vref bypass on. */
1400 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1401
1402 ddr_dll_ctl3.cn73xx.bit_select = 0x9; /* No-op */
1403 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1404
1405 lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num)); // flush writes
1406
1407 return (unsigned short)GET_DDR_DLL_CTL3(offset);
1408}
1409
1410// arg dac_or_dbi is 1 for DAC, 0 for DBI
1411// returns 9 entries (bytelanes 0 through 8) in settings[]
1412// returns 0 if OK, -1 if a problem
1413static int read_dac_dbi_settings(struct ddr_priv *priv, int if_num,
1414 int dac_or_dbi, int *settings)
1415{
1416 union cvmx_lmcx_phy_ctl phy_ctl;
1417 int byte_lane, bit_num;
1418 int deskew;
1419 int dac_value;
1420 int new_deskew_layout = 0;
1421
1422 new_deskew_layout = octeon_is_cpuid(OCTEON_CN73XX) ||
1423 octeon_is_cpuid(OCTEON_CNF75XX);
1424 new_deskew_layout |= (octeon_is_cpuid(OCTEON_CN78XX) &&
1425 !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X));
1426
1427 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1428 phy_ctl.s.dsk_dbg_clk_scaler = 3;
1429 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1430
1431 bit_num = (dac_or_dbi) ? 4 : 5;
1432 // DBI not available
1433 if (bit_num == 5 && !new_deskew_layout)
1434 return -1;
1435
1436 // FIXME: always assume ECC is available
1437 for (byte_lane = 8; byte_lane >= 0; --byte_lane) {
1438 //set byte lane and bit to read
1439 phy_ctl.s.dsk_dbg_bit_sel = bit_num;
1440 phy_ctl.s.dsk_dbg_byte_sel = byte_lane;
1441 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1442
1443 //start read sequence
1444 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1445 phy_ctl.s.dsk_dbg_rd_start = 1;
1446 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1447
1448 //poll for read sequence to complete
1449 do {
1450 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1451 } while (phy_ctl.s.dsk_dbg_rd_complete != 1);
1452
1453 // keep the flag bits where they are for DBI
1454 deskew = phy_ctl.s.dsk_dbg_rd_data; /* >> 3 */
1455 dac_value = phy_ctl.s.dsk_dbg_rd_data & 0xff;
1456
1457 settings[byte_lane] = (dac_or_dbi) ? dac_value : deskew;
1458 }
1459
1460 return 0;
1461}
1462
1463// print out the DBI settings array
1464// arg dac_or_dbi is 1 for DAC, 0 for DBI
1465static void display_dac_dbi_settings(int lmc, int dac_or_dbi,
1466 int ecc_ena, int *settings, char *title)
1467{
1468 int byte;
1469 int flags;
1470 int deskew;
1471 const char *fc = " ?-=+*#&";
1472
1473 debug("N0.LMC%d: %s %s Settings %d:0 :",
1474 lmc, title, (dac_or_dbi) ? "DAC" : "DBI", 7 + ecc_ena);
1475 // FIXME: what about 32-bit mode?
1476 for (byte = (7 + ecc_ena); byte >= 0; --byte) {
1477 if (dac_or_dbi) { // DAC
1478 flags = 1; // say its locked to get blank
1479 deskew = settings[byte] & 0xff;
1480 } else { // DBI
1481 flags = settings[byte] & 7;
1482 deskew = (settings[byte] >> 3) & 0x7f;
1483 }
1484 debug(" %3d %c", deskew, fc[flags ^ 1]);
1485 }
1486 debug("\n");
1487}
1488
1489// Find a HWL majority
1490static int find_wl_majority(struct wlevel_bitcnt *bc, int *mx, int *mc,
1491 int *xc, int *cc)
1492{
1493 int ix, ic;
1494
1495 *mx = -1;
1496 *mc = 0;
1497 *xc = 0;
1498 *cc = 0;
1499
1500 for (ix = 0; ix < 4; ix++) {
1501 ic = bc->bitcnt[ix];
1502
1503 // make a bitmask of the ones with a count
1504 if (ic > 0) {
1505 *mc |= (1 << ix);
1506 *cc += 1; // count how many had non-zero counts
1507 }
1508
1509 // find the majority
1510 if (ic > *xc) { // new max?
1511 *xc = ic; // yes
1512 *mx = ix; // set its index
1513 }
1514 }
1515
1516 return (*mx << 1);
1517}
1518
1519// Evaluate the DAC settings array
1520static int evaluate_dac_settings(int if_64b, int ecc_ena, int *settings)
1521{
1522 int byte, lane, dac, comp;
1523 int last = (if_64b) ? 7 : 3;
1524
1525 // FIXME: change the check...???
1526 // this looks only for sets of DAC values whose max/min differ by a lot
1527 // let any EVEN go so long as it is within range...
1528 for (byte = (last + ecc_ena); byte >= 0; --byte) {
1529 dac = settings[byte] & 0xff;
1530
1531 for (lane = (last + ecc_ena); lane >= 0; --lane) {
1532 comp = settings[lane] & 0xff;
1533 if (abs((dac - comp)) > 25)
1534 return 1;
1535 }
1536 }
1537
1538 return 0;
1539}
1540
1541static void perform_offset_training(struct ddr_priv *priv, int rank_mask,
1542 int if_num)
1543{
1544 union cvmx_lmcx_phy_ctl lmc_phy_ctl;
1545 u64 orig_phy_ctl;
1546 const char *s;
1547
1548 /*
1549 * 4.8.6 LMC Offset Training
1550 *
1551 * LMC requires input-receiver offset training.
1552 *
1553 * 1. Write LMC(0)_PHY_CTL[DAC_ON] = 1
1554 */
1555 lmc_phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1556 orig_phy_ctl = lmc_phy_ctl.u64;
1557 lmc_phy_ctl.s.dac_on = 1;
1558
1559 // allow full CSR override
1560 s = lookup_env_ull(priv, "ddr_phy_ctl");
1561 if (s)
1562 lmc_phy_ctl.u64 = strtoull(s, NULL, 0);
1563
1564 // do not print or write if CSR does not change...
1565 if (lmc_phy_ctl.u64 != orig_phy_ctl) {
1566 debug("PHY_CTL : 0x%016llx\n",
1567 lmc_phy_ctl.u64);
1568 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), lmc_phy_ctl.u64);
1569 }
1570
1571 /*
1572 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0B and
1573 * LMC(0)_SEQ_CTL[INIT_START] = 1.
1574 *
1575 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1576 */
1577 /* Start Offset training sequence */
1578 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0B);
1579}
1580
1581static void perform_internal_vref_training(struct ddr_priv *priv,
1582 int rank_mask, int if_num)
1583{
1584 union cvmx_lmcx_ext_config ext_config;
1585 union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
1586
1587 // First, make sure all byte-lanes are out of VREF bypass mode
1588 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
1589
1590 ddr_dll_ctl3.cn78xx.byte_sel = 0x0A; /* all byte-lanes */
1591 ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */
1592 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1593
1594 ddr_dll_ctl3.cn78xx.bit_select = 0x0E; /* vref bypass off. */
1595 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1596
1597 ddr_dll_ctl3.cn78xx.bit_select = 0x09; /* No-op */
1598 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
1599
1600 /*
1601 * 4.8.7 LMC Internal vref Training
1602 *
1603 * LMC requires input-reference-voltage training.
1604 *
1605 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 0.
1606 */
1607 ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1608 ext_config.s.vrefint_seq_deskew = 0;
1609
1610 ddr_seq_print("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1611 ext_config.s.vrefint_seq_deskew);
1612
1613 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1614
1615 /*
1616 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0a and
1617 * LMC(0)_SEQ_CTL[INIT_START] = 1.
1618 *
1619 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1620 */
1621 /* Start LMC Internal vref Training */
1622 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1623}
1624
1625#define dbg_avg(format, ...) // debug(format, ##__VA_ARGS__)
1626
1627static int process_samples_average(s16 *bytes, int num_samples,
1628 int lmc, int lane_no)
1629{
1630 int i, sadj, sum = 0, ret, asum, trunc;
1631 s16 smin = 32767, smax = -32768;
1632 int nmin, nmax;
1633 //int rng;
1634
1635 dbg_avg("DBG_AVG%d.%d: ", lmc, lane_no);
1636
1637 for (i = 0; i < num_samples; i++) {
1638 sum += bytes[i];
1639 if (bytes[i] < smin)
1640 smin = bytes[i];
1641 if (bytes[i] > smax)
1642 smax = bytes[i];
1643 dbg_avg(" %3d", bytes[i]);
1644 }
1645
1646 nmin = 0;
1647 nmax = 0;
1648 for (i = 0; i < num_samples; i++) {
1649 if (bytes[i] == smin)
1650 nmin += 1;
1651 if (bytes[i] == smax)
1652 nmax += 1;
1653 }
1654 dbg_avg(" (min=%3d/%d, max=%3d/%d, range=%2d, samples=%2d)",
1655 smin, nmin, smax, nmax, rng, num_samples);
1656
1657 asum = sum - smin - smax;
1658
1659 sadj = divide_nint(asum * 10, (num_samples - 2));
1660
1661 trunc = asum / (num_samples - 2);
1662
1663 dbg_avg(" [%3d.%d, %3d]", sadj / 10, sadj % 10, trunc);
1664
1665 sadj = divide_nint(sadj, 10);
1666 if (trunc & 1)
1667 ret = trunc;
1668 else if (sadj & 1)
1669 ret = sadj;
1670 else
1671 ret = trunc + 1;
1672
1673 dbg_avg(" -> %3d\n", ret);
1674
1675 return ret;
1676}
1677
1678#define DEFAULT_SAT_RETRY_LIMIT 11 // 1 + 10 retries
1679
1680#define default_lock_retry_limit 20 // 20 retries
1681#define deskew_validation_delay 10000 // 10 millisecs
1682
1683static int perform_deskew_training(struct ddr_priv *priv, int rank_mask,
1684 int if_num, int spd_rawcard_aorb)
1685{
1686 int unsaturated, locked;
1687 int sat_retries, sat_retries_limit;
1688 int lock_retries, lock_retries_total, lock_retries_limit;
1689 int print_first;
1690 int print_them_all;
1691 struct deskew_counts dsk_counts;
1692 union cvmx_lmcx_phy_ctl phy_ctl;
1693 char *s;
1694 int has_no_sat = octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
1695 octeon_is_cpuid(OCTEON_CNF75XX);
1696 int disable_bitval_retries = 1; // default to disabled
1697
1698 debug("N0.LMC%d: Performing Deskew Training.\n", if_num);
1699
1700 sat_retries = 0;
1701 sat_retries_limit = (has_no_sat) ? 5 : DEFAULT_SAT_RETRY_LIMIT;
1702
1703 lock_retries_total = 0;
1704 unsaturated = 0;
1705 print_first = 1; // print the first one
1706 // set to true for printing all normal deskew attempts
1707 print_them_all = 0;
1708
1709 // provide override for bitval_errs causing internal VREF retries
1710 s = env_get("ddr_disable_bitval_retries");
1711 if (s)
1712 disable_bitval_retries = !!simple_strtoul(s, NULL, 0);
1713
1714 lock_retries_limit = default_lock_retry_limit;
1715 if ((octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) ||
1716 (octeon_is_cpuid(OCTEON_CN73XX)) ||
1717 (octeon_is_cpuid(OCTEON_CNF75XX)))
1718 lock_retries_limit *= 2; // give new chips twice as many
1719
1720 do { /* while (sat_retries < sat_retry_limit) */
1721 /*
1722 * 4.8.8 LMC Deskew Training
1723 *
1724 * LMC requires input-read-data deskew training.
1725 *
1726 * 1. Write LMC(0)_EXT_CONFIG[VREFINT_SEQ_DESKEW] = 1.
1727 */
1728
1729 union cvmx_lmcx_ext_config ext_config;
1730
1731 ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
1732 ext_config.s.vrefint_seq_deskew = 1;
1733
1734 ddr_seq_print
1735 ("Performing LMC sequence: vrefint_seq_deskew = %d\n",
1736 ext_config.s.vrefint_seq_deskew);
1737
1738 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_config.u64);
1739
1740 /*
1741 * 2. Write LMC(0)_SEQ_CTL[SEQ_SEL] = 0x0A and
1742 * LMC(0)_SEQ_CTL[INIT_START] = 1.
1743 *
1744 * 3. Wait for LMC(0)_SEQ_CTL[SEQ_COMPLETE] to be set to 1.
1745 */
1746
1747 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1748 phy_ctl.s.phy_dsk_reset = 1; /* RESET Deskew sequence */
1749 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1750
1751 /* LMC Deskew Training */
1752 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1753
1754 lock_retries = 0;
1755
1756perform_deskew_training:
1757
1758 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
1759 phy_ctl.s.phy_dsk_reset = 0; /* Normal Deskew sequence */
1760 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
1761
1762 /* LMC Deskew Training */
1763 oct3_ddr3_seq(priv, rank_mask, if_num, 0x0A);
1764
1765 // Moved this from validate_deskew_training
1766 /* Allow deskew results to stabilize before evaluating them. */
1767 udelay(deskew_validation_delay);
1768
1769 // Now go look at lock and saturation status...
1770 validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
1771 print_first);
1772 // after printing the first and not doing them all, no more
1773 if (print_first && !print_them_all)
1774 print_first = 0;
1775
1776 unsaturated = (dsk_counts.saturated == 0);
1777 locked = (dsk_counts.unlocked == 0);
1778
1779 // only do locking retries if unsaturated or rawcard A or B,
1780 // otherwise full SAT retry
1781 if (unsaturated || (spd_rawcard_aorb && !has_no_sat)) {
1782 if (!locked) { // and not locked
1783 lock_retries++;
1784 lock_retries_total++;
1785 if (lock_retries <= lock_retries_limit) {
1786 goto perform_deskew_training;
1787 } else {
1788 debug("N0.LMC%d: LOCK RETRIES failed after %d retries\n",
1789 if_num, lock_retries_limit);
1790 }
1791 } else {
1792 // only print if we did try
1793 if (lock_retries_total > 0)
1794 debug("N0.LMC%d: LOCK RETRIES successful after %d retries\n",
1795 if_num, lock_retries);
1796 }
1797 } /* if (unsaturated || spd_rawcard_aorb) */
1798
1799 ++sat_retries;
1800
1801 /*
1802 * At this point, check for a DDR4 RDIMM that will not
1803 * benefit from SAT retries; if so, exit
1804 */
1805 if (spd_rawcard_aorb && !has_no_sat) {
1806 debug("N0.LMC%d: Deskew Training Loop: Exiting for RAWCARD == A or B.\n",
1807 if_num);
1808 break; // no sat or lock retries
1809 }
1810
1811 } while (!unsaturated && (sat_retries < sat_retries_limit));
1812
1813 debug("N0.LMC%d: Deskew Training %s. %d sat-retries, %d lock-retries\n",
1814 if_num, (sat_retries >= DEFAULT_SAT_RETRY_LIMIT) ?
1815 "Timed Out" : "Completed", sat_retries - 1, lock_retries_total);
1816
1817 // FIXME? add saturation to reasons for fault return - give it a
1818 // chance via Internal VREF
1819 // FIXME? add OPTIONAL bit value to reasons for fault return -
1820 // give it a chance via Internal VREF
1821 if (dsk_counts.nibrng_errs != 0 || dsk_counts.nibunl_errs != 0 ||
1822 (dsk_counts.bitval_errs != 0 && !disable_bitval_retries) ||
1823 !unsaturated) {
1824 debug("N0.LMC%d: Nibble or Saturation Error(s) found, returning FAULT\n",
1825 if_num);
1826 // FIXME: do we want this output always for errors?
1827 validate_deskew_training(priv, rank_mask, if_num,
1828 &dsk_counts, 1);
1829 return -1; // we did retry locally, they did not help
1830 }
1831
1832 // NOTE: we (currently) always print one last training validation
1833 // before starting Read Leveling...
1834
1835 return 0;
1836}
1837
1838#define SCALING_FACTOR (1000)
1839
1840// NOTE: this gets called for 1-rank and 2-rank DIMMs in single-slot config
1841static int compute_vref_1slot_2rank(int rtt_wr, int rtt_park, int dqx_ctl,
1842 int rank_count, int dram_connection)
1843{
1844 u64 reff_s;
1845 u64 rser_s = (dram_connection) ? 0 : 15;
1846 u64 vdd = 1200;
1847 u64 vref;
1848 // 99 == HiZ
1849 u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1850 1 * 1024 * 1024 : rtt_wr);
1851 u64 rtt_park_s = (((rtt_park == 0) || ((rank_count == 1) &&
1852 (rtt_wr != 0))) ?
1853 1 * 1024 * 1024 : rtt_park);
1854 u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1855 int vref_value;
1856 u64 rangepc = 6000; // range1 base
1857 u64 vrefpc;
1858 int vref_range = 0;
1859
1860 reff_s = divide_nint((rtt_wr_s * rtt_park_s), (rtt_wr_s + rtt_park_s));
1861
1862 vref = (((rser_s + dqx_ctl_s) * SCALING_FACTOR) /
1863 (rser_s + dqx_ctl_s + reff_s)) + SCALING_FACTOR;
1864
1865 vref = (vref * vdd) / 2 / SCALING_FACTOR;
1866
1867 vrefpc = (vref * 100 * 100) / vdd;
1868
1869 if (vrefpc < rangepc) { // < range1 base, use range2
1870 vref_range = 1 << 6; // set bit A6 for range2
1871 rangepc = 4500; // range2 base is 45%
1872 }
1873
1874 vref_value = divide_nint(vrefpc - rangepc, 65);
1875 if (vref_value < 0)
1876 vref_value = vref_range; // set to base of range
1877 else
1878 vref_value |= vref_range;
1879
1880 debug("rtt_wr: %d, rtt_park: %d, dqx_ctl: %d, rank_count: %d\n",
1881 rtt_wr, rtt_park, dqx_ctl, rank_count);
1882 debug("rtt_wr_s: %lld, rtt_park_s: %lld, dqx_ctl_s: %lld, vref_value: 0x%x, range: %d\n",
1883 rtt_wr_s, rtt_park_s, dqx_ctl_s, vref_value ^ vref_range,
1884 vref_range ? 2 : 1);
1885
1886 return vref_value;
1887}
1888
1889// NOTE: this gets called for 1-rank and 2-rank DIMMs in two-slot configs
1890static int compute_vref_2slot_2rank(int rtt_wr, int rtt_park_00,
1891 int rtt_park_01,
1892 int dqx_ctl, int rtt_nom,
1893 int dram_connection)
1894{
1895 u64 rser = (dram_connection) ? 0 : 15;
1896 u64 vdd = 1200;
1897 u64 vl, vlp, vcm;
1898 u64 rd0, rd1, rpullup;
1899 // 99 == HiZ
1900 u64 rtt_wr_s = (((rtt_wr == 0) || rtt_wr == 99) ?
1901 1 * 1024 * 1024 : rtt_wr);
1902 u64 rtt_park_00_s = (rtt_park_00 == 0 ? 1 * 1024 * 1024 : rtt_park_00);
1903 u64 rtt_park_01_s = (rtt_park_01 == 0 ? 1 * 1024 * 1024 : rtt_park_01);
1904 u64 dqx_ctl_s = (dqx_ctl == 0 ? 1 * 1024 * 1024 : dqx_ctl);
1905 u64 rtt_nom_s = (rtt_nom == 0 ? 1 * 1024 * 1024 : rtt_nom);
1906 int vref_value;
1907 u64 rangepc = 6000; // range1 base
1908 u64 vrefpc;
1909 int vref_range = 0;
1910
1911 // rd0 = (RTT_NOM (parallel) RTT_WR) + =
1912 // ((RTT_NOM * RTT_WR) / (RTT_NOM + RTT_WR)) + RSER
1913 rd0 = divide_nint((rtt_nom_s * rtt_wr_s),
1914 (rtt_nom_s + rtt_wr_s)) + rser;
1915
1916 // rd1 = (RTT_PARK_00 (parallel) RTT_PARK_01) + RSER =
1917 // ((RTT_PARK_00 * RTT_PARK_01) / (RTT_PARK_00 + RTT_PARK_01)) + RSER
1918 rd1 = divide_nint((rtt_park_00_s * rtt_park_01_s),
1919 (rtt_park_00_s + rtt_park_01_s)) + rser;
1920
1921 // rpullup = rd0 (parallel) rd1 = (rd0 * rd1) / (rd0 + rd1)
1922 rpullup = divide_nint((rd0 * rd1), (rd0 + rd1));
1923
1924 // vl = (DQX_CTL / (DQX_CTL + rpullup)) * 1.2
1925 vl = divide_nint((dqx_ctl_s * vdd), (dqx_ctl_s + rpullup));
1926
1927 // vlp = ((RSER / rd0) * (1.2 - vl)) + vl
1928 vlp = divide_nint((rser * (vdd - vl)), rd0) + vl;
1929
1930 // vcm = (vlp + 1.2) / 2
1931 vcm = divide_nint((vlp + vdd), 2);
1932
1933 // vrefpc = (vcm / 1.2) * 100
1934 vrefpc = divide_nint((vcm * 100 * 100), vdd);
1935
1936 if (vrefpc < rangepc) { // < range1 base, use range2
1937 vref_range = 1 << 6; // set bit A6 for range2
1938 rangepc = 4500; // range2 base is 45%
1939 }
1940
1941 vref_value = divide_nint(vrefpc - rangepc, 65);
1942 if (vref_value < 0)
1943 vref_value = vref_range; // set to base of range
1944 else
1945 vref_value |= vref_range;
1946
1947 debug("rtt_wr:%d, rtt_park_00:%d, rtt_park_01:%d, dqx_ctl:%d, rtt_nom:%d, vref_value:%d (0x%x)\n",
1948 rtt_wr, rtt_park_00, rtt_park_01, dqx_ctl, rtt_nom, vref_value,
1949 vref_value);
1950
1951 return vref_value;
1952}
1953
1954// NOTE: only call this for DIMMs with 1 or 2 ranks, not 4.
1955static int compute_vref_val(struct ddr_priv *priv, int if_num, int rankx,
1956 int dimm_count, int rank_count,
1957 struct impedence_values *imp_values,
1958 int is_stacked_die, int dram_connection)
1959{
1960 int computed_final_vref_value = 0;
1961 int enable_adjust = ENABLE_COMPUTED_VREF_ADJUSTMENT;
1962 const char *s;
1963 int rtt_wr, dqx_ctl, rtt_nom, index;
1964 union cvmx_lmcx_modereg_params1 lmc_modereg_params1;
1965 union cvmx_lmcx_modereg_params2 lmc_modereg_params2;
1966 union cvmx_lmcx_comp_ctl2 comp_ctl2;
1967 int rtt_park;
1968 int rtt_park_00;
1969 int rtt_park_01;
1970
1971 debug("N0.LMC%d.R%d: %s(...dram_connection = %d)\n",
1972 if_num, rankx, __func__, dram_connection);
1973
1974 // allow some overrides...
1975 s = env_get("ddr_adjust_computed_vref");
1976 if (s) {
1977 enable_adjust = !!simple_strtoul(s, NULL, 0);
1978 if (!enable_adjust) {
1979 debug("N0.LMC%d.R%d: DISABLE adjustment of computed VREF\n",
1980 if_num, rankx);
1981 }
1982 }
1983
1984 s = env_get("ddr_set_computed_vref");
1985 if (s) {
1986 int new_vref = simple_strtoul(s, NULL, 0);
1987
1988 debug("N0.LMC%d.R%d: OVERRIDE computed VREF to 0x%x (%d)\n",
1989 if_num, rankx, new_vref, new_vref);
1990 return new_vref;
1991 }
1992
1993 /*
1994 * Calculate an alternative to the measured vref value
1995 * but only for configurations we know how to...
1996 */
1997 // We have code for 2-rank DIMMs in both 1-slot or 2-slot configs,
1998 // and can use the 2-rank 1-slot code for 1-rank DIMMs in 1-slot
1999 // configs, and can use the 2-rank 2-slot code for 1-rank DIMMs
2000 // in 2-slot configs.
2001
2002 lmc_modereg_params1.u64 =
2003 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
2004 lmc_modereg_params2.u64 =
2005 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num));
2006 comp_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
2007 dqx_ctl = imp_values->dqx_strength[comp_ctl2.s.dqx_ctl];
2008
2009 // WR always comes from the current rank
2010 index = (lmc_modereg_params1.u64 >> (rankx * 12 + 5)) & 0x03;
2011 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
2012 index |= lmc_modereg_params1.u64 >> (51 + rankx - 2) & 0x04;
2013 rtt_wr = imp_values->rtt_wr_ohms[index];
2014
2015 // separate calculations for 1 vs 2 DIMMs per LMC
2016 if (dimm_count == 1) {
2017 // PARK comes from this rank if 1-rank, otherwise other rank
2018 index =
2019 (lmc_modereg_params2.u64 >>
2020 ((rankx ^ (rank_count - 1)) * 10 + 0)) & 0x07;
2021 rtt_park = imp_values->rtt_nom_ohms[index];
2022 computed_final_vref_value =
2023 compute_vref_1slot_2rank(rtt_wr, rtt_park, dqx_ctl,
2024 rank_count, dram_connection);
2025 } else {
2026 // get both PARK values from the other DIMM
2027 index =
2028 (lmc_modereg_params2.u64 >> ((rankx ^ 0x02) * 10 + 0)) &
2029 0x07;
2030 rtt_park_00 = imp_values->rtt_nom_ohms[index];
2031 index =
2032 (lmc_modereg_params2.u64 >> ((rankx ^ 0x03) * 10 + 0)) &
2033 0x07;
2034 rtt_park_01 = imp_values->rtt_nom_ohms[index];
2035 // NOM comes from this rank if 1-rank, otherwise other rank
2036 index =
2037 (lmc_modereg_params1.u64 >>
2038 ((rankx ^ (rank_count - 1)) * 12 + 9)) & 0x07;
2039 rtt_nom = imp_values->rtt_nom_ohms[index];
2040 computed_final_vref_value =
2041 compute_vref_2slot_2rank(rtt_wr, rtt_park_00, rtt_park_01,
2042 dqx_ctl, rtt_nom, dram_connection);
2043 }
2044
2045 if (enable_adjust) {
2046 union cvmx_lmcx_config lmc_config;
2047 union cvmx_lmcx_control lmc_control;
2048
2049 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
2050 lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2051
2052 /*
2053 * New computed vref = existing computed vref – X
2054 *
2055 * The value of X is depending on different conditions.
2056 * Both #122 and #139 are 2Rx4 RDIMM, while #124 is stacked
2057 * die 2Rx4, so I conclude the results into two conditions:
2058 *
2059 * 1. Stacked Die: 2Rx4
2060 * 1-slot: offset = 7. i, e New computed vref = existing
2061 * computed vref – 7
2062 * 2-slot: offset = 6
2063 *
2064 * 2. Regular: 2Rx4
2065 * 1-slot: offset = 3
2066 * 2-slot: offset = 2
2067 */
2068 // we know we never get called unless DDR4, so test just
2069 // the other conditions
2070 if (lmc_control.s.rdimm_ena == 1 &&
2071 rank_count == 2 && lmc_config.s.mode_x4dev) {
2072 // it must first be RDIMM and 2-rank and x4
2073 int adj;
2074
2075 // now do according to stacked die or not...
2076 if (is_stacked_die)
2077 adj = (dimm_count == 1) ? -7 : -6;
2078 else
2079 adj = (dimm_count == 1) ? -3 : -2;
2080
2081 // we must have adjusted it, so print it out if
2082 // verbosity is right
2083 debug("N0.LMC%d.R%d: adjusting computed vref from %2d (0x%02x) to %2d (0x%02x)\n",
2084 if_num, rankx, computed_final_vref_value,
2085 computed_final_vref_value,
2086 computed_final_vref_value + adj,
2087 computed_final_vref_value + adj);
2088 computed_final_vref_value += adj;
2089 }
2090 }
2091
2092 return computed_final_vref_value;
2093}
2094
2095static void unpack_rlevel_settings(int if_bytemask, int ecc_ena,
2096 struct rlevel_byte_data *rlevel_byte,
2097 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank)
2098{
2099 if ((if_bytemask & 0xff) == 0xff) {
2100 if (ecc_ena) {
2101 rlevel_byte[8].delay = lmc_rlevel_rank.s.byte7;
2102 rlevel_byte[7].delay = lmc_rlevel_rank.s.byte6;
2103 rlevel_byte[6].delay = lmc_rlevel_rank.s.byte5;
2104 rlevel_byte[5].delay = lmc_rlevel_rank.s.byte4;
2105 /* ECC */
2106 rlevel_byte[4].delay = lmc_rlevel_rank.s.byte8;
2107 } else {
2108 rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7;
2109 rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6;
2110 rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5;
2111 rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4;
2112 }
2113 } else {
2114 rlevel_byte[8].delay = lmc_rlevel_rank.s.byte8; /* unused */
2115 rlevel_byte[7].delay = lmc_rlevel_rank.s.byte7; /* unused */
2116 rlevel_byte[6].delay = lmc_rlevel_rank.s.byte6; /* unused */
2117 rlevel_byte[5].delay = lmc_rlevel_rank.s.byte5; /* unused */
2118 rlevel_byte[4].delay = lmc_rlevel_rank.s.byte4; /* ECC */
2119 }
2120
2121 rlevel_byte[3].delay = lmc_rlevel_rank.s.byte3;
2122 rlevel_byte[2].delay = lmc_rlevel_rank.s.byte2;
2123 rlevel_byte[1].delay = lmc_rlevel_rank.s.byte1;
2124 rlevel_byte[0].delay = lmc_rlevel_rank.s.byte0;
2125}
2126
2127static void pack_rlevel_settings(int if_bytemask, int ecc_ena,
2128 struct rlevel_byte_data *rlevel_byte,
2129 union cvmx_lmcx_rlevel_rankx
2130 *final_rlevel_rank)
2131{
2132 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank = *final_rlevel_rank;
2133
2134 if ((if_bytemask & 0xff) == 0xff) {
2135 if (ecc_ena) {
2136 lmc_rlevel_rank.s.byte7 = rlevel_byte[8].delay;
2137 lmc_rlevel_rank.s.byte6 = rlevel_byte[7].delay;
2138 lmc_rlevel_rank.s.byte5 = rlevel_byte[6].delay;
2139 lmc_rlevel_rank.s.byte4 = rlevel_byte[5].delay;
2140 /* ECC */
2141 lmc_rlevel_rank.s.byte8 = rlevel_byte[4].delay;
2142 } else {
2143 lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2144 lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2145 lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2146 lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2147 }
2148 } else {
2149 lmc_rlevel_rank.s.byte8 = rlevel_byte[8].delay;
2150 lmc_rlevel_rank.s.byte7 = rlevel_byte[7].delay;
2151 lmc_rlevel_rank.s.byte6 = rlevel_byte[6].delay;
2152 lmc_rlevel_rank.s.byte5 = rlevel_byte[5].delay;
2153 lmc_rlevel_rank.s.byte4 = rlevel_byte[4].delay;
2154 }
2155
2156 lmc_rlevel_rank.s.byte3 = rlevel_byte[3].delay;
2157 lmc_rlevel_rank.s.byte2 = rlevel_byte[2].delay;
2158 lmc_rlevel_rank.s.byte1 = rlevel_byte[1].delay;
2159 lmc_rlevel_rank.s.byte0 = rlevel_byte[0].delay;
2160
2161 *final_rlevel_rank = lmc_rlevel_rank;
2162}
2163
2164/////////////////// These are the RLEVEL settings display routines
2165
2166// flags
2167#define WITH_NOTHING 0
2168#define WITH_SCORE 1
2169#define WITH_AVERAGE 2
2170#define WITH_FINAL 4
2171#define WITH_COMPUTE 8
2172
2173static void do_display_rl(int if_num,
2174 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2175 int rank, int flags, int score)
2176{
2177 char score_buf[16];
2178 char *msg_buf;
2179 char hex_buf[20];
2180
2181 if (flags & WITH_SCORE) {
2182 snprintf(score_buf, sizeof(score_buf), "(%d)", score);
2183 } else {
2184 score_buf[0] = ' ';
2185 score_buf[1] = 0;
2186 }
2187
2188 if (flags & WITH_AVERAGE) {
2189 msg_buf = " DELAY AVERAGES ";
2190 } else if (flags & WITH_FINAL) {
2191 msg_buf = " FINAL SETTINGS ";
2192 } else if (flags & WITH_COMPUTE) {
2193 msg_buf = " COMPUTED DELAYS ";
2194 } else {
2195 snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2196 (unsigned long long)lmc_rlevel_rank.u64);
2197 msg_buf = hex_buf;
2198 }
2199
2200 debug("N0.LMC%d.R%d: Rlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d %s\n",
2201 if_num, rank, lmc_rlevel_rank.s.status, msg_buf,
2202 lmc_rlevel_rank.s.byte8, lmc_rlevel_rank.s.byte7,
2203 lmc_rlevel_rank.s.byte6, lmc_rlevel_rank.s.byte5,
2204 lmc_rlevel_rank.s.byte4, lmc_rlevel_rank.s.byte3,
2205 lmc_rlevel_rank.s.byte2, lmc_rlevel_rank.s.byte1,
2206 lmc_rlevel_rank.s.byte0, score_buf);
2207}
2208
2209static void display_rl(int if_num,
2210 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank, int rank)
2211{
2212 do_display_rl(if_num, lmc_rlevel_rank, rank, 0, 0);
2213}
2214
2215static void display_rl_with_score(int if_num,
2216 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2217 int rank, int score)
2218{
2219 do_display_rl(if_num, lmc_rlevel_rank, rank, 1, score);
2220}
2221
2222static void display_rl_with_final(int if_num,
2223 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2224 int rank)
2225{
2226 do_display_rl(if_num, lmc_rlevel_rank, rank, 4, 0);
2227}
2228
2229static void display_rl_with_computed(int if_num,
2230 union cvmx_lmcx_rlevel_rankx
2231 lmc_rlevel_rank, int rank, int score)
2232{
2233 do_display_rl(if_num, lmc_rlevel_rank, rank, 9, score);
2234}
2235
2236// flag values
2237#define WITH_RODT_BLANK 0
2238#define WITH_RODT_SKIPPING 1
2239#define WITH_RODT_BESTROW 2
2240#define WITH_RODT_BESTSCORE 3
2241// control
2242#define SKIP_SKIPPING 1
2243
2244static const char *with_rodt_canned_msgs[4] = {
2245 " ", "SKIPPING ", "BEST ROW ", "BEST SCORE"
2246};
2247
2248static void display_rl_with_rodt(int if_num,
2249 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank,
2250 int rank, int score,
2251 int nom_ohms, int rodt_ohms, int flag)
2252{
2253 const char *msg_buf;
2254 char set_buf[20];
2255
2256#if SKIP_SKIPPING
2257 if (flag == WITH_RODT_SKIPPING)
2258 return;
2259#endif
2260
2261 msg_buf = with_rodt_canned_msgs[flag];
2262 if (nom_ohms < 0) {
2263 snprintf(set_buf, sizeof(set_buf), " RODT %3d ",
2264 rodt_ohms);
2265 } else {
2266 snprintf(set_buf, sizeof(set_buf), "NOM %3d RODT %3d", nom_ohms,
2267 rodt_ohms);
2268 }
2269
2270 debug("N0.LMC%d.R%d: Rlevel %s %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d (%d)\n",
2271 if_num, rank, set_buf, msg_buf, lmc_rlevel_rank.s.byte8,
2272 lmc_rlevel_rank.s.byte7, lmc_rlevel_rank.s.byte6,
2273 lmc_rlevel_rank.s.byte5, lmc_rlevel_rank.s.byte4,
2274 lmc_rlevel_rank.s.byte3, lmc_rlevel_rank.s.byte2,
2275 lmc_rlevel_rank.s.byte1, lmc_rlevel_rank.s.byte0, score);
2276}
2277
2278static void do_display_wl(int if_num,
2279 union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2280 int rank, int flags)
2281{
2282 char *msg_buf;
2283 char hex_buf[20];
2284
2285 if (flags & WITH_FINAL) {
2286 msg_buf = " FINAL SETTINGS ";
2287 } else {
2288 snprintf(hex_buf, sizeof(hex_buf), "0x%016llX",
2289 (unsigned long long)lmc_wlevel_rank.u64);
2290 msg_buf = hex_buf;
2291 }
2292
2293 debug("N0.LMC%d.R%d: Wlevel Rank %#4x, %s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2294 if_num, rank, lmc_wlevel_rank.s.status, msg_buf,
2295 lmc_wlevel_rank.s.byte8, lmc_wlevel_rank.s.byte7,
2296 lmc_wlevel_rank.s.byte6, lmc_wlevel_rank.s.byte5,
2297 lmc_wlevel_rank.s.byte4, lmc_wlevel_rank.s.byte3,
2298 lmc_wlevel_rank.s.byte2, lmc_wlevel_rank.s.byte1,
2299 lmc_wlevel_rank.s.byte0);
2300}
2301
2302static void display_wl(int if_num,
2303 union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank, int rank)
2304{
2305 do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_NOTHING);
2306}
2307
2308static void display_wl_with_final(int if_num,
2309 union cvmx_lmcx_wlevel_rankx lmc_wlevel_rank,
2310 int rank)
2311{
2312 do_display_wl(if_num, lmc_wlevel_rank, rank, WITH_FINAL);
2313}
2314
2315// pretty-print bitmask adjuster
2316static u64 ppbm(u64 bm)
2317{
2318 if (bm != 0ul) {
2319 while ((bm & 0x0fful) == 0ul)
2320 bm >>= 4;
2321 }
2322
2323 return bm;
2324}
2325
2326// xlate PACKED index to UNPACKED index to use with rlevel_byte
2327#define XPU(i, e) (((i) < 4) ? (i) : (((i) < 8) ? (i) + (e) : 4))
2328// xlate UNPACKED index to PACKED index to use with rlevel_bitmask
2329#define XUP(i, e) (((i) < 4) ? (i) : (e) ? (((i) > 4) ? (i) - 1 : 8) : (i))
2330
2331// flag values
2332#define WITH_WL_BITMASKS 0
2333#define WITH_RL_BITMASKS 1
2334#define WITH_RL_MASK_SCORES 2
2335#define WITH_RL_SEQ_SCORES 3
2336
2337static void do_display_bm(int if_num, int rank, void *bm,
2338 int flags, int ecc)
2339{
2340 if (flags == WITH_WL_BITMASKS) {
2341 // wlevel_bitmask array in PACKED index order, so just
2342 // print them
2343 int *bitmasks = (int *)bm;
2344
2345 debug("N0.LMC%d.R%d: Wlevel Debug Bitmasks : %05x %05x %05x %05x %05x %05x %05x %05x %05x\n",
2346 if_num, rank, bitmasks[8], bitmasks[7], bitmasks[6],
2347 bitmasks[5], bitmasks[4], bitmasks[3], bitmasks[2],
2348 bitmasks[1], bitmasks[0]
2349 );
2350 } else if (flags == WITH_RL_BITMASKS) {
2351 // rlevel_bitmask array in PACKED index order, so just
2352 // print them
2353 struct rlevel_bitmask *rlevel_bitmask =
2354 (struct rlevel_bitmask *)bm;
2355
2356 debug("N0.LMC%d.R%d: Rlevel Debug Bitmasks 8:0 : %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx %05llx\n",
2357 if_num, rank, ppbm(rlevel_bitmask[8].bm),
2358 ppbm(rlevel_bitmask[7].bm), ppbm(rlevel_bitmask[6].bm),
2359 ppbm(rlevel_bitmask[5].bm), ppbm(rlevel_bitmask[4].bm),
2360 ppbm(rlevel_bitmask[3].bm), ppbm(rlevel_bitmask[2].bm),
2361 ppbm(rlevel_bitmask[1].bm), ppbm(rlevel_bitmask[0].bm)
2362 );
2363 } else if (flags == WITH_RL_MASK_SCORES) {
2364 // rlevel_bitmask array in PACKED index order, so just
2365 // print them
2366 struct rlevel_bitmask *rlevel_bitmask =
2367 (struct rlevel_bitmask *)bm;
2368
2369 debug("N0.LMC%d.R%d: Rlevel Debug Bitmask Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2370 if_num, rank, rlevel_bitmask[8].errs,
2371 rlevel_bitmask[7].errs, rlevel_bitmask[6].errs,
2372 rlevel_bitmask[5].errs, rlevel_bitmask[4].errs,
2373 rlevel_bitmask[3].errs, rlevel_bitmask[2].errs,
2374 rlevel_bitmask[1].errs, rlevel_bitmask[0].errs);
2375 } else if (flags == WITH_RL_SEQ_SCORES) {
2376 // rlevel_byte array in UNPACKED index order, so xlate
2377 // and print them
2378 struct rlevel_byte_data *rlevel_byte =
2379 (struct rlevel_byte_data *)bm;
2380
2381 debug("N0.LMC%d.R%d: Rlevel Debug Non-seq Scores 8:0 : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
2382 if_num, rank, rlevel_byte[XPU(8, ecc)].sqerrs,
2383 rlevel_byte[XPU(7, ecc)].sqerrs,
2384 rlevel_byte[XPU(6, ecc)].sqerrs,
2385 rlevel_byte[XPU(5, ecc)].sqerrs,
2386 rlevel_byte[XPU(4, ecc)].sqerrs,
2387 rlevel_byte[XPU(3, ecc)].sqerrs,
2388 rlevel_byte[XPU(2, ecc)].sqerrs,
2389 rlevel_byte[XPU(1, ecc)].sqerrs,
2390 rlevel_byte[XPU(0, ecc)].sqerrs);
2391 }
2392}
2393
2394static void display_wl_bm(int if_num, int rank, int *bitmasks)
2395{
2396 do_display_bm(if_num, rank, (void *)bitmasks, WITH_WL_BITMASKS, 0);
2397}
2398
2399static void display_rl_bm(int if_num, int rank,
2400 struct rlevel_bitmask *bitmasks, int ecc_ena)
2401{
2402 do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_BITMASKS,
2403 ecc_ena);
2404}
2405
2406static void display_rl_bm_scores(int if_num, int rank,
2407 struct rlevel_bitmask *bitmasks, int ecc_ena)
2408{
2409 do_display_bm(if_num, rank, (void *)bitmasks, WITH_RL_MASK_SCORES,
2410 ecc_ena);
2411}
2412
2413static void display_rl_seq_scores(int if_num, int rank,
2414 struct rlevel_byte_data *bytes, int ecc_ena)
2415{
2416 do_display_bm(if_num, rank, (void *)bytes, WITH_RL_SEQ_SCORES, ecc_ena);
2417}
2418
2419#define RODT_OHMS_COUNT 8
2420#define RTT_NOM_OHMS_COUNT 8
2421#define RTT_NOM_TABLE_COUNT 8
2422#define RTT_WR_OHMS_COUNT 8
2423#define DIC_OHMS_COUNT 3
2424#define DRIVE_STRENGTH_COUNT 15
2425
2426static unsigned char ddr4_rodt_ohms[RODT_OHMS_COUNT] = {
2427 0, 40, 60, 80, 120, 240, 34, 48 };
2428static unsigned char ddr4_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2429 0, 60, 120, 40, 240, 48, 80, 34 };
2430static unsigned char ddr4_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2431 0, 4, 2, 6, 1, 5, 3, 7 };
2432// setting HiZ ohms to 99 for computed vref
2433static unsigned char ddr4_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = {
2434 0, 120, 240, 99, 80 };
2435static unsigned char ddr4_dic_ohms[DIC_OHMS_COUNT] = { 34, 48 };
2436static short ddr4_drive_strength[DRIVE_STRENGTH_COUNT] = {
2437 0, 0, 26, 30, 34, 40, 48, 68, 0, 0, 0, 0, 0, 0, 0 };
2438static short ddr4_dqx_strength[DRIVE_STRENGTH_COUNT] = {
2439 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2440struct impedence_values ddr4_impedence_val = {
2441 .rodt_ohms = ddr4_rodt_ohms,
2442 .rtt_nom_ohms = ddr4_rtt_nom_ohms,
2443 .rtt_nom_table = ddr4_rtt_nom_table,
2444 .rtt_wr_ohms = ddr4_rtt_wr_ohms,
2445 .dic_ohms = ddr4_dic_ohms,
2446 .drive_strength = ddr4_drive_strength,
2447 .dqx_strength = ddr4_dqx_strength,
2448};
2449
2450static unsigned char ddr3_rodt_ohms[RODT_OHMS_COUNT] = {
2451 0, 20, 30, 40, 60, 120, 0, 0 };
2452static unsigned char ddr3_rtt_nom_ohms[RTT_NOM_OHMS_COUNT] = {
2453 0, 60, 120, 40, 20, 30, 0, 0 };
2454static unsigned char ddr3_rtt_nom_table[RTT_NOM_TABLE_COUNT] = {
2455 0, 2, 1, 3, 5, 4, 0, 0 };
2456static unsigned char ddr3_rtt_wr_ohms[RTT_WR_OHMS_COUNT] = { 0, 60, 120 };
2457static unsigned char ddr3_dic_ohms[DIC_OHMS_COUNT] = { 40, 34 };
2458static short ddr3_drive_strength[DRIVE_STRENGTH_COUNT] = {
2459 0, 24, 27, 30, 34, 40, 48, 60, 0, 0, 0, 0, 0, 0, 0 };
2460static struct impedence_values ddr3_impedence_val = {
2461 .rodt_ohms = ddr3_rodt_ohms,
2462 .rtt_nom_ohms = ddr3_rtt_nom_ohms,
2463 .rtt_nom_table = ddr3_rtt_nom_table,
2464 .rtt_wr_ohms = ddr3_rtt_wr_ohms,
2465 .dic_ohms = ddr3_dic_ohms,
2466 .drive_strength = ddr3_drive_strength,
2467 .dqx_strength = ddr3_drive_strength,
2468};
2469
2470static u64 hertz_to_psecs(u64 hertz)
2471{
2472 /* Clock in psecs */
2473 return divide_nint((u64)1000 * 1000 * 1000 * 1000, hertz);
2474}
2475
2476#define DIVIDEND_SCALE 1000 /* Scale to avoid rounding error. */
2477
2478static u64 psecs_to_mts(u64 psecs)
2479{
2480 return divide_nint(divide_nint((u64)(2 * 1000000 * DIVIDEND_SCALE),
2481 psecs), DIVIDEND_SCALE);
2482}
2483
2484#define WITHIN(v, b, m) (((v) >= ((b) - (m))) && ((v) <= ((b) + (m))))
2485
2486static unsigned long pretty_psecs_to_mts(u64 psecs)
2487{
2488 u64 ret = 0; // default to error
2489
2490 if (WITHIN(psecs, 2500, 1))
2491 ret = 800;
2492 else if (WITHIN(psecs, 1875, 1))
2493 ret = 1066;
2494 else if (WITHIN(psecs, 1500, 1))
2495 ret = 1333;
2496 else if (WITHIN(psecs, 1250, 1))
2497 ret = 1600;
2498 else if (WITHIN(psecs, 1071, 1))
2499 ret = 1866;
2500 else if (WITHIN(psecs, 937, 1))
2501 ret = 2133;
2502 else if (WITHIN(psecs, 833, 1))
2503 ret = 2400;
2504 else if (WITHIN(psecs, 750, 1))
2505 ret = 2666;
2506 return ret;
2507}
2508
2509static u64 mts_to_hertz(u64 mts)
2510{
2511 return ((mts * 1000 * 1000) / 2);
2512}
2513
2514static int compute_rc3x(int64_t tclk_psecs)
2515{
2516 long speed;
2517 long tclk_psecs_min, tclk_psecs_max;
2518 long data_rate_mhz, data_rate_mhz_min, data_rate_mhz_max;
2519 int rc3x;
2520
2521#define ENCODING_BASE 1240
2522
2523 data_rate_mhz = psecs_to_mts(tclk_psecs);
2524
2525 /*
2526 * 2400 MT/s is a special case. Using integer arithmetic it rounds
2527 * from 833 psecs to 2401 MT/s. Force it to 2400 to pick the
2528 * proper setting from the table.
2529 */
2530 if (tclk_psecs == 833)
2531 data_rate_mhz = 2400;
2532
2533 for (speed = ENCODING_BASE; speed < 3200; speed += 20) {
2534 int error = 0;
2535
2536 /* Clock in psecs */
2537 tclk_psecs_min = hertz_to_psecs(mts_to_hertz(speed + 00));
2538 /* Clock in psecs */
2539 tclk_psecs_max = hertz_to_psecs(mts_to_hertz(speed + 18));
2540
2541 data_rate_mhz_min = psecs_to_mts(tclk_psecs_min);
2542 data_rate_mhz_max = psecs_to_mts(tclk_psecs_max);
2543
2544 /* Force alingment to multiple to avound rounding errors. */
2545 data_rate_mhz_min = ((data_rate_mhz_min + 18) / 20) * 20;
2546 data_rate_mhz_max = ((data_rate_mhz_max + 18) / 20) * 20;
2547
2548 error += (speed + 00 != data_rate_mhz_min);
2549 error += (speed + 20 != data_rate_mhz_max);
2550
2551 rc3x = (speed - ENCODING_BASE) / 20;
2552
2553 if (data_rate_mhz <= (speed + 20))
2554 break;
2555 }
2556
2557 return rc3x;
2558}
2559
2560/*
2561 * static global variables needed, so that functions (loops) can be
2562 * restructured from the main huge function. Its not elegant, but the
2563 * only way to break the original functions like init_octeon3_ddr3_interface()
2564 * into separate logical smaller functions with less indentation levels.
2565 */
2566static int if_num __section(".data");
2567static u32 if_mask __section(".data");
2568static int ddr_hertz __section(".data");
2569
2570static struct ddr_conf *ddr_conf __section(".data");
2571static const struct dimm_odt_config *odt_1rank_config __section(".data");
2572static const struct dimm_odt_config *odt_2rank_config __section(".data");
2573static const struct dimm_odt_config *odt_4rank_config __section(".data");
2574static struct dimm_config *dimm_config_table __section(".data");
2575static const struct dimm_odt_config *odt_config __section(".data");
2576static const struct ddr3_custom_config *c_cfg __section(".data");
2577
2578static int odt_idx __section(".data");
2579
2580static ulong tclk_psecs __section(".data");
2581static ulong eclk_psecs __section(".data");
2582
2583static int row_bits __section(".data");
2584static int col_bits __section(".data");
2585static int num_banks __section(".data");
2586static int num_ranks __section(".data");
2587static int dram_width __section(".data");
2588static int dimm_count __section(".data");
2589/* Accumulate and report all the errors before giving up */
2590static int fatal_error __section(".data");
2591/* Flag that indicates safe DDR settings should be used */
2592static int safe_ddr_flag __section(".data");
2593/* Octeon II Default: 64bit interface width */
2594static int if_64b __section(".data");
2595static int if_bytemask __section(".data");
2596static u32 mem_size_mbytes __section(".data");
2597static unsigned int didx __section(".data");
2598static int bank_bits __section(".data");
2599static int bunk_enable __section(".data");
2600static int rank_mask __section(".data");
2601static int column_bits_start __section(".data");
2602static int row_lsb __section(".data");
2603static int pbank_lsb __section(".data");
2604static int use_ecc __section(".data");
2605static int mtb_psec __section(".data");
2606static short ftb_dividend __section(".data");
2607static short ftb_divisor __section(".data");
2608static int taamin __section(".data");
2609static int tckmin __section(".data");
2610static int cl __section(".data");
2611static int min_cas_latency __section(".data");
2612static int max_cas_latency __section(".data");
2613static int override_cas_latency __section(".data");
2614static int ddr_rtt_nom_auto __section(".data");
2615static int ddr_rodt_ctl_auto __section(".data");
2616
2617static int spd_addr __section(".data");
2618static int spd_org __section(".data");
2619static int spd_banks __section(".data");
2620static int spd_rdimm __section(".data");
2621static int spd_dimm_type __section(".data");
2622static int spd_ecc __section(".data");
2623static u32 spd_cas_latency __section(".data");
2624static int spd_mtb_dividend __section(".data");
2625static int spd_mtb_divisor __section(".data");
2626static int spd_tck_min __section(".data");
2627static int spd_taa_min __section(".data");
2628static int spd_twr __section(".data");
2629static int spd_trcd __section(".data");
2630static int spd_trrd __section(".data");
2631static int spd_trp __section(".data");
2632static int spd_tras __section(".data");
2633static int spd_trc __section(".data");
2634static int spd_trfc __section(".data");
2635static int spd_twtr __section(".data");
2636static int spd_trtp __section(".data");
2637static int spd_tfaw __section(".data");
2638static int spd_addr_mirror __section(".data");
2639static int spd_package __section(".data");
2640static int spd_rawcard __section(".data");
2641static int spd_rawcard_aorb __section(".data");
2642static int spd_rdimm_registers __section(".data");
2643static int spd_thermal_sensor __section(".data");
2644
2645static int is_stacked_die __section(".data");
2646static int is_3ds_dimm __section(".data");
2647// 3DS: logical ranks per package rank
2648static int lranks_per_prank __section(".data");
2649// 3DS: logical ranks bits
2650static int lranks_bits __section(".data");
2651// in Mbits; only used for 3DS
2652static int die_capacity __section(".data");
2653
2654static enum ddr_type ddr_type __section(".data");
2655
2656static int twr __section(".data");
2657static int trcd __section(".data");
2658static int trrd __section(".data");
2659static int trp __section(".data");
2660static int tras __section(".data");
2661static int trc __section(".data");
2662static int trfc __section(".data");
2663static int twtr __section(".data");
2664static int trtp __section(".data");
2665static int tfaw __section(".data");
2666
2667static int ddr4_tckavgmin __section(".data");
2668static int ddr4_tckavgmax __section(".data");
2669static int ddr4_trdcmin __section(".data");
2670static int ddr4_trpmin __section(".data");
2671static int ddr4_trasmin __section(".data");
2672static int ddr4_trcmin __section(".data");
2673static int ddr4_trfc1min __section(".data");
2674static int ddr4_trfc2min __section(".data");
2675static int ddr4_trfc4min __section(".data");
2676static int ddr4_tfawmin __section(".data");
2677static int ddr4_trrd_smin __section(".data");
2678static int ddr4_trrd_lmin __section(".data");
2679static int ddr4_tccd_lmin __section(".data");
2680
2681static int wl_mask_err __section(".data");
2682static int wl_loops __section(".data");
2683static int default_rtt_nom[4] __section(".data");
2684static int dyn_rtt_nom_mask __section(".data");
2685static struct impedence_values *imp_val __section(".data");
2686static char default_rodt_ctl __section(".data");
2687// default to disabled (ie, try LMC restart, not chip reset)
2688static int ddr_disable_chip_reset __section(".data");
2689static const char *dimm_type_name __section(".data");
2690static int match_wl_rtt_nom __section(".data");
2691
2692struct hwl_alt_by_rank {
2693 u16 hwl_alt_mask; // mask of bytelanes with alternate
2694 u16 hwl_alt_delay[9]; // bytelane alternate avail if mask=1
2695};
2696
2697static struct hwl_alt_by_rank hwl_alts[4] __section(".data");
2698
2699#define DEFAULT_INTERNAL_VREF_TRAINING_LIMIT 3 // was: 5
2700static int internal_retries __section(".data");
2701
2702static int deskew_training_errors __section(".data");
2703static struct deskew_counts deskew_training_results __section(".data");
2704static int disable_deskew_training __section(".data");
2705static int restart_if_dsk_incomplete __section(".data");
2706static int dac_eval_retries __section(".data");
2707static int dac_settings[9] __section(".data");
2708static int num_samples __section(".data");
2709static int sample __section(".data");
2710static int lane __section(".data");
2711static int last_lane __section(".data");
2712static int total_dac_eval_retries __section(".data");
2713static int dac_eval_exhausted __section(".data");
2714
2715#define DEFAULT_DAC_SAMPLES 7 // originally was 5
2716#define DAC_RETRIES_LIMIT 2
2717
2718struct bytelane_sample {
2719 s16 bytes[DEFAULT_DAC_SAMPLES];
2720};
2721
2722static struct bytelane_sample lanes[9] __section(".data");
2723
2724static char disable_sequential_delay_check __section(".data");
2725static int wl_print __section(".data");
2726
2727static int enable_by_rank_init __section(".data");
2728static int saved_rank_mask __section(".data");
2729static int by_rank __section(".data");
2730static struct deskew_data rank_dsk[4] __section(".data");
2731static struct dac_data rank_dac[4] __section(".data");
2732
2733// todo: perhaps remove node at some time completely?
2734static int node __section(".data");
2735static int base_cl __section(".data");
2736
2737/* Parameters from DDR3 Specifications */
2738#define DDR3_TREFI 7800000 /* 7.8 us */
2739#define DDR3_ZQCS 80000ull /* 80 ns */
2740#define DDR3_ZQCS_INTERNAL 1280000000ull /* 128ms/100 */
2741#define DDR3_TCKE 5000 /* 5 ns */
2742#define DDR3_TMRD 4 /* 4 nCK */
2743#define DDR3_TDLLK 512 /* 512 nCK */
2744#define DDR3_TMPRR 1 /* 1 nCK */
2745#define DDR3_TWLMRD 40 /* 40 nCK */
2746#define DDR3_TWLDQSEN 25 /* 25 nCK */
2747
2748/* Parameters from DDR4 Specifications */
2749#define DDR4_TMRD 8 /* 8 nCK */
2750#define DDR4_TDLLK 768 /* 768 nCK */
2751
2752static void lmc_config(struct ddr_priv *priv)
2753{
2754 union cvmx_lmcx_config cfg;
2755 char *s;
2756
2757 cfg.u64 = 0;
2758
2759 cfg.cn78xx.ecc_ena = use_ecc;
2760 cfg.cn78xx.row_lsb = encode_row_lsb_ddr3(row_lsb);
2761 cfg.cn78xx.pbank_lsb = encode_pbank_lsb_ddr3(pbank_lsb);
2762
2763 cfg.cn78xx.idlepower = 0; /* Disabled */
2764
2765 s = lookup_env(priv, "ddr_idlepower");
2766 if (s)
2767 cfg.cn78xx.idlepower = simple_strtoul(s, NULL, 0);
2768
2769 cfg.cn78xx.forcewrite = 0; /* Disabled */
2770 /* Include memory reference address in the ECC */
2771 cfg.cn78xx.ecc_adr = 1;
2772
2773 s = lookup_env(priv, "ddr_ecc_adr");
2774 if (s)
2775 cfg.cn78xx.ecc_adr = simple_strtoul(s, NULL, 0);
2776
2777 cfg.cn78xx.reset = 0;
2778
2779 /*
2780 * Program LMC0_CONFIG[24:18], ref_zqcs_int(6:0) to
2781 * RND-DN(tREFI/clkPeriod/512) Program LMC0_CONFIG[36:25],
2782 * ref_zqcs_int(18:7) to
2783 * RND-DN(ZQCS_Interval/clkPeriod/(512*128)). Note that this
2784 * value should always be greater than 32, to account for
2785 * resistor calibration delays.
2786 */
2787
2788 cfg.cn78xx.ref_zqcs_int = ((DDR3_TREFI / tclk_psecs / 512) & 0x7f);
2789 cfg.cn78xx.ref_zqcs_int |=
2790 ((max(33ull, (DDR3_ZQCS_INTERNAL / (tclk_psecs / 100) /
2791 (512 * 128))) & 0xfff) << 7);
2792
2793 cfg.cn78xx.early_dqx = 1; /* Default to enabled */
2794
2795 s = lookup_env(priv, "ddr_early_dqx");
2796 if (!s)
2797 s = lookup_env(priv, "ddr%d_early_dqx", if_num);
2798
2799 if (s)
2800 cfg.cn78xx.early_dqx = simple_strtoul(s, NULL, 0);
2801
2802 cfg.cn78xx.sref_with_dll = 0;
2803
2804 cfg.cn78xx.rank_ena = bunk_enable;
2805 cfg.cn78xx.rankmask = rank_mask; /* Set later */
2806 cfg.cn78xx.mirrmask = (spd_addr_mirror << 1 | spd_addr_mirror << 3) &
2807 rank_mask;
2808 /* Set once and don't change it. */
2809 cfg.cn78xx.init_status = rank_mask;
2810 cfg.cn78xx.early_unload_d0_r0 = 0;
2811 cfg.cn78xx.early_unload_d0_r1 = 0;
2812 cfg.cn78xx.early_unload_d1_r0 = 0;
2813 cfg.cn78xx.early_unload_d1_r1 = 0;
2814 cfg.cn78xx.scrz = 0;
2815 if (octeon_is_cpuid(OCTEON_CN70XX))
2816 cfg.cn78xx.mode32b = 1; /* Read-only. Always 1. */
2817 cfg.cn78xx.mode_x4dev = (dram_width == 4) ? 1 : 0;
2818 cfg.cn78xx.bg2_enable = ((ddr_type == DDR4_DRAM) &&
2819 (dram_width == 16)) ? 0 : 1;
2820
2821 s = lookup_env_ull(priv, "ddr_config");
2822 if (s)
2823 cfg.u64 = simple_strtoull(s, NULL, 0);
2824 debug("LMC_CONFIG : 0x%016llx\n",
2825 cfg.u64);
2826 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
2827}
2828
2829static void lmc_control(struct ddr_priv *priv)
2830{
2831 union cvmx_lmcx_control ctrl;
2832 char *s;
2833
2834 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
2835 ctrl.s.rdimm_ena = spd_rdimm;
2836 ctrl.s.bwcnt = 0; /* Clear counter later */
2837 if (spd_rdimm)
2838 ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_rdimm);
2839 else
2840 ctrl.s.ddr2t = (safe_ddr_flag ? 1 : c_cfg->ddr2t_udimm);
2841 ctrl.s.pocas = 0;
2842 ctrl.s.fprch2 = (safe_ddr_flag ? 2 : c_cfg->fprch2);
2843 ctrl.s.throttle_rd = safe_ddr_flag ? 1 : 0;
2844 ctrl.s.throttle_wr = safe_ddr_flag ? 1 : 0;
2845 ctrl.s.inorder_rd = safe_ddr_flag ? 1 : 0;
2846 ctrl.s.inorder_wr = safe_ddr_flag ? 1 : 0;
2847 ctrl.s.elev_prio_dis = safe_ddr_flag ? 1 : 0;
2848 /* discards writes to addresses that don't exist in the DRAM */
2849 ctrl.s.nxm_write_en = 0;
2850 ctrl.s.max_write_batch = 8;
2851 ctrl.s.xor_bank = 1;
2852 ctrl.s.auto_dclkdis = 1;
2853 ctrl.s.int_zqcs_dis = 0;
2854 ctrl.s.ext_zqcs_dis = 0;
2855 ctrl.s.bprch = 1;
2856 ctrl.s.wodt_bprch = 1;
2857 ctrl.s.rodt_bprch = 1;
2858
2859 s = lookup_env(priv, "ddr_xor_bank");
2860 if (s)
2861 ctrl.s.xor_bank = simple_strtoul(s, NULL, 0);
2862
2863 s = lookup_env(priv, "ddr_2t");
2864 if (s)
2865 ctrl.s.ddr2t = simple_strtoul(s, NULL, 0);
2866
2867 s = lookup_env(priv, "ddr_fprch2");
2868 if (s)
2869 ctrl.s.fprch2 = simple_strtoul(s, NULL, 0);
2870
2871 s = lookup_env(priv, "ddr_bprch");
2872 if (s)
2873 ctrl.s.bprch = simple_strtoul(s, NULL, 0);
2874
2875 s = lookup_env(priv, "ddr_wodt_bprch");
2876 if (s)
2877 ctrl.s.wodt_bprch = simple_strtoul(s, NULL, 0);
2878
2879 s = lookup_env(priv, "ddr_rodt_bprch");
2880 if (s)
2881 ctrl.s.rodt_bprch = simple_strtoul(s, NULL, 0);
2882
2883 s = lookup_env(priv, "ddr_int_zqcs_dis");
2884 if (s)
2885 ctrl.s.int_zqcs_dis = simple_strtoul(s, NULL, 0);
2886
2887 s = lookup_env(priv, "ddr_ext_zqcs_dis");
2888 if (s)
2889 ctrl.s.ext_zqcs_dis = simple_strtoul(s, NULL, 0);
2890
2891 s = lookup_env_ull(priv, "ddr_control");
2892 if (s)
2893 ctrl.u64 = simple_strtoull(s, NULL, 0);
2894
2895 debug("LMC_CONTROL : 0x%016llx\n",
2896 ctrl.u64);
2897 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
2898}
2899
2900static void lmc_timing_params0(struct ddr_priv *priv)
2901{
2902 union cvmx_lmcx_timing_params0 tp0;
2903 unsigned int trp_value;
2904 char *s;
2905
2906 tp0.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS0(if_num));
2907
2908 trp_value = divide_roundup(trp, tclk_psecs) - 1;
2909 debug("TIMING_PARAMS0[TRP]: NEW 0x%x, OLD 0x%x\n", trp_value,
2910 trp_value +
2911 (unsigned int)(divide_roundup(max(4ull * tclk_psecs, 7500ull),
2912 tclk_psecs)) - 4);
2913 s = lookup_env_ull(priv, "ddr_use_old_trp");
2914 if (s) {
2915 if (!!simple_strtoull(s, NULL, 0)) {
2916 trp_value +=
2917 divide_roundup(max(4ull * tclk_psecs, 7500ull),
2918 tclk_psecs) - 4;
2919 debug("TIMING_PARAMS0[trp]: USING OLD 0x%x\n",
2920 trp_value);
2921 }
2922 }
2923
2924 tp0.cn78xx.txpr =
2925 divide_roundup(max(5ull * tclk_psecs, trfc + 10000ull),
2926 16 * tclk_psecs);
2927 tp0.cn78xx.trp = trp_value & 0x1f;
2928 tp0.cn78xx.tcksre =
2929 divide_roundup(max(5ull * tclk_psecs, 10000ull), tclk_psecs) - 1;
2930
2931 if (ddr_type == DDR4_DRAM) {
2932 int tzqinit = 4; // Default to 4, for all DDR4 speed bins
2933
2934 s = lookup_env(priv, "ddr_tzqinit");
2935 if (s)
2936 tzqinit = simple_strtoul(s, NULL, 0);
2937
2938 tp0.cn78xx.tzqinit = tzqinit;
2939 /* Always 8. */
2940 tp0.cn78xx.tzqcs = divide_roundup(128 * tclk_psecs,
2941 (16 * tclk_psecs));
2942 tp0.cn78xx.tcke =
2943 divide_roundup(max(3 * tclk_psecs, (ulong)DDR3_TCKE),
2944 tclk_psecs) - 1;
2945 tp0.cn78xx.tmrd =
2946 divide_roundup((DDR4_TMRD * tclk_psecs), tclk_psecs) - 1;
2947 tp0.cn78xx.tmod = 25; /* 25 is the max allowed */
2948 tp0.cn78xx.tdllk = divide_roundup(DDR4_TDLLK, 256);
2949 } else {
2950 tp0.cn78xx.tzqinit =
2951 divide_roundup(max(512ull * tclk_psecs, 640000ull),
2952 (256 * tclk_psecs));
2953 tp0.cn78xx.tzqcs =
2954 divide_roundup(max(64ull * tclk_psecs, DDR3_ZQCS),
2955 (16 * tclk_psecs));
2956 tp0.cn78xx.tcke = divide_roundup(DDR3_TCKE, tclk_psecs) - 1;
2957 tp0.cn78xx.tmrd =
2958 divide_roundup((DDR3_TMRD * tclk_psecs), tclk_psecs) - 1;
2959 tp0.cn78xx.tmod =
2960 divide_roundup(max(12ull * tclk_psecs, 15000ull),
2961 tclk_psecs) - 1;
2962 tp0.cn78xx.tdllk = divide_roundup(DDR3_TDLLK, 256);
2963 }
2964
2965 s = lookup_env_ull(priv, "ddr_timing_params0");
2966 if (s)
2967 tp0.u64 = simple_strtoull(s, NULL, 0);
2968 debug("TIMING_PARAMS0 : 0x%016llx\n",
2969 tp0.u64);
2970 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS0(if_num), tp0.u64);
2971}
2972
2973static void lmc_timing_params1(struct ddr_priv *priv)
2974{
2975 union cvmx_lmcx_timing_params1 tp1;
2976 unsigned int txp, temp_trcd, trfc_dlr;
2977 char *s;
2978
2979 tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
2980
2981 /* .cn70xx. */
2982 tp1.s.tmprr = divide_roundup(DDR3_TMPRR * tclk_psecs, tclk_psecs) - 1;
2983
2984 tp1.cn78xx.tras = divide_roundup(tras, tclk_psecs) - 1;
2985
2986 temp_trcd = divide_roundup(trcd, tclk_psecs);
2987 if (temp_trcd > 15) {
2988 debug("TIMING_PARAMS1[trcd]: need extension bit for 0x%x\n",
2989 temp_trcd);
2990 }
2991 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trcd > 15) {
2992 /*
2993 * Let .trcd=0 serve as a flag that the field has
2994 * overflowed. Must use Additive Latency mode as a
2995 * workaround.
2996 */
2997 temp_trcd = 0;
2998 }
2999 tp1.cn78xx.trcd = (temp_trcd >> 0) & 0xf;
3000 tp1.cn78xx.trcd_ext = (temp_trcd >> 4) & 0x1;
3001
3002 tp1.cn78xx.twtr = divide_roundup(twtr, tclk_psecs) - 1;
3003 tp1.cn78xx.trfc = divide_roundup(trfc, 8 * tclk_psecs);
3004
3005 if (ddr_type == DDR4_DRAM) {
3006 /* Workaround bug 24006. Use Trrd_l. */
3007 tp1.cn78xx.trrd =
3008 divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3009 } else {
3010 tp1.cn78xx.trrd = divide_roundup(trrd, tclk_psecs) - 2;
3011 }
3012
3013 /*
3014 * tXP = max( 3nCK, 7.5 ns) DDR3-800 tCLK = 2500 psec
3015 * tXP = max( 3nCK, 7.5 ns) DDR3-1066 tCLK = 1875 psec
3016 * tXP = max( 3nCK, 6.0 ns) DDR3-1333 tCLK = 1500 psec
3017 * tXP = max( 3nCK, 6.0 ns) DDR3-1600 tCLK = 1250 psec
3018 * tXP = max( 3nCK, 6.0 ns) DDR3-1866 tCLK = 1071 psec
3019 * tXP = max( 3nCK, 6.0 ns) DDR3-2133 tCLK = 937 psec
3020 */
3021 txp = (tclk_psecs < 1875) ? 6000 : 7500;
3022 txp = divide_roundup(max((unsigned int)(3 * tclk_psecs), txp),
3023 tclk_psecs) - 1;
3024 if (txp > 7) {
3025 debug("TIMING_PARAMS1[txp]: need extension bit for 0x%x\n",
3026 txp);
3027 }
3028 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && txp > 7)
3029 txp = 7; // max it out
3030 tp1.cn78xx.txp = (txp >> 0) & 7;
3031 tp1.cn78xx.txp_ext = (txp >> 3) & 1;
3032
3033 tp1.cn78xx.twlmrd = divide_roundup(DDR3_TWLMRD * tclk_psecs,
3034 4 * tclk_psecs);
3035 tp1.cn78xx.twldqsen = divide_roundup(DDR3_TWLDQSEN * tclk_psecs,
3036 4 * tclk_psecs);
3037 tp1.cn78xx.tfaw = divide_roundup(tfaw, 4 * tclk_psecs);
3038 tp1.cn78xx.txpdll = divide_roundup(max(10ull * tclk_psecs, 24000ull),
3039 tclk_psecs) - 1;
3040
3041 if (ddr_type == DDR4_DRAM && is_3ds_dimm) {
3042 /*
3043 * 4 Gb: tRFC_DLR = 90 ns
3044 * 8 Gb: tRFC_DLR = 120 ns
3045 * 16 Gb: tRFC_DLR = 190 ns FIXME?
3046 */
3047 if (die_capacity == 0x1000) // 4 Gbit
3048 trfc_dlr = 90;
3049 else if (die_capacity == 0x2000) // 8 Gbit
3050 trfc_dlr = 120;
3051 else if (die_capacity == 0x4000) // 16 Gbit
3052 trfc_dlr = 190;
3053 else
3054 trfc_dlr = 0;
3055
3056 if (trfc_dlr == 0) {
3057 debug("N%d.LMC%d: ERROR: tRFC_DLR: die_capacity %u Mbit is illegal\n",
3058 node, if_num, die_capacity);
3059 } else {
3060 tp1.cn78xx.trfc_dlr =
3061 divide_roundup(trfc_dlr * 1000UL, 8 * tclk_psecs);
3062 debug("N%d.LMC%d: TIMING_PARAMS1[trfc_dlr] set to %u\n",
3063 node, if_num, tp1.cn78xx.trfc_dlr);
3064 }
3065 }
3066
3067 s = lookup_env_ull(priv, "ddr_timing_params1");
3068 if (s)
3069 tp1.u64 = simple_strtoull(s, NULL, 0);
3070
3071 debug("TIMING_PARAMS1 : 0x%016llx\n",
3072 tp1.u64);
3073 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3074}
3075
3076static void lmc_timing_params2(struct ddr_priv *priv)
3077{
3078 if (ddr_type == DDR4_DRAM) {
3079 union cvmx_lmcx_timing_params1 tp1;
3080 union cvmx_lmcx_timing_params2 tp2;
3081 int temp_trrd_l;
3082
3083 tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
3084 tp2.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS2(if_num));
3085 debug("TIMING_PARAMS2 : 0x%016llx\n",
3086 tp2.u64);
3087
3088 temp_trrd_l = divide_roundup(ddr4_trrd_lmin, tclk_psecs) - 2;
3089 if (temp_trrd_l > 7)
3090 debug("TIMING_PARAMS2[trrd_l]: need extension bit for 0x%x\n",
3091 temp_trrd_l);
3092 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && temp_trrd_l > 7)
3093 temp_trrd_l = 7; // max it out
3094 tp2.cn78xx.trrd_l = (temp_trrd_l >> 0) & 7;
3095 tp2.cn78xx.trrd_l_ext = (temp_trrd_l >> 3) & 1;
3096
3097 // correct for 1600-2400
3098 tp2.s.twtr_l = divide_nint(max(4ull * tclk_psecs, 7500ull),
3099 tclk_psecs) - 1;
3100 tp2.s.t_rw_op_max = 7;
3101 tp2.s.trtp = divide_roundup(max(4ull * tclk_psecs, 7500ull),
3102 tclk_psecs) - 1;
3103
3104 debug("TIMING_PARAMS2 : 0x%016llx\n",
3105 tp2.u64);
3106 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS2(if_num), tp2.u64);
3107
3108 /*
3109 * Workaround Errata 25823 - LMC: Possible DDR4 tWTR_L not met
3110 * for Write-to-Read operations to the same Bank Group
3111 */
3112 if (tp1.cn78xx.twtr < (tp2.s.twtr_l - 4)) {
3113 tp1.cn78xx.twtr = tp2.s.twtr_l - 4;
3114 debug("ERRATA 25823: NEW: TWTR: %d, TWTR_L: %d\n",
3115 tp1.cn78xx.twtr, tp2.s.twtr_l);
3116 debug("TIMING_PARAMS1 : 0x%016llx\n",
3117 tp1.u64);
3118 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
3119 }
3120 }
3121}
3122
3123static void lmc_modereg_params0(struct ddr_priv *priv)
3124{
3125 union cvmx_lmcx_modereg_params0 mp0;
3126 int param;
3127 char *s;
3128
3129 mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
3130
3131 if (ddr_type == DDR4_DRAM) {
3132 mp0.s.cwl = 0; /* 1600 (1250ps) */
3133 if (tclk_psecs < 1250)
3134 mp0.s.cwl = 1; /* 1866 (1072ps) */
3135 if (tclk_psecs < 1072)
3136 mp0.s.cwl = 2; /* 2133 (938ps) */
3137 if (tclk_psecs < 938)
3138 mp0.s.cwl = 3; /* 2400 (833ps) */
3139 if (tclk_psecs < 833)
3140 mp0.s.cwl = 4; /* 2666 (750ps) */
3141 if (tclk_psecs < 750)
3142 mp0.s.cwl = 5; /* 3200 (625ps) */
3143 } else {
3144 /*
3145 ** CSR CWL CAS write Latency
3146 ** === === =================================
3147 ** 0 5 ( tCK(avg) >= 2.5 ns)
3148 ** 1 6 (2.5 ns > tCK(avg) >= 1.875 ns)
3149 ** 2 7 (1.875 ns > tCK(avg) >= 1.5 ns)
3150 ** 3 8 (1.5 ns > tCK(avg) >= 1.25 ns)
3151 ** 4 9 (1.25 ns > tCK(avg) >= 1.07 ns)
3152 ** 5 10 (1.07 ns > tCK(avg) >= 0.935 ns)
3153 ** 6 11 (0.935 ns > tCK(avg) >= 0.833 ns)
3154 ** 7 12 (0.833 ns > tCK(avg) >= 0.75 ns)
3155 */
3156
3157 mp0.s.cwl = 0;
3158 if (tclk_psecs < 2500)
3159 mp0.s.cwl = 1;
3160 if (tclk_psecs < 1875)
3161 mp0.s.cwl = 2;
3162 if (tclk_psecs < 1500)
3163 mp0.s.cwl = 3;
3164 if (tclk_psecs < 1250)
3165 mp0.s.cwl = 4;
3166 if (tclk_psecs < 1070)
3167 mp0.s.cwl = 5;
3168 if (tclk_psecs < 935)
3169 mp0.s.cwl = 6;
3170 if (tclk_psecs < 833)
3171 mp0.s.cwl = 7;
3172 }
3173
3174 s = lookup_env(priv, "ddr_cwl");
3175 if (s)
3176 mp0.s.cwl = simple_strtoul(s, NULL, 0) - 5;
3177
3178 if (ddr_type == DDR4_DRAM) {
3179 debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3180 mp0.s.cwl + 9
3181 + ((mp0.s.cwl > 2) ? (mp0.s.cwl - 3) * 2 : 0), mp0.s.cwl);
3182 } else {
3183 debug("%-45s : %d, [0x%x]\n", "CAS Write Latency CWL, [CSR]",
3184 mp0.s.cwl + 5, mp0.s.cwl);
3185 }
3186
3187 mp0.s.mprloc = 0;
3188 mp0.s.mpr = 0;
3189 mp0.s.dll = (ddr_type == DDR4_DRAM); /* 0 for DDR3 and 1 for DDR4 */
3190 mp0.s.al = 0;
3191 mp0.s.wlev = 0; /* Read Only */
3192 if (octeon_is_cpuid(OCTEON_CN70XX) || ddr_type == DDR4_DRAM)
3193 mp0.s.tdqs = 0;
3194 else
3195 mp0.s.tdqs = 1;
3196 mp0.s.qoff = 0;
3197
3198 s = lookup_env(priv, "ddr_cl");
3199 if (s) {
3200 cl = simple_strtoul(s, NULL, 0);
3201 debug("CAS Latency : %6d\n",
3202 cl);
3203 }
3204
3205 if (ddr_type == DDR4_DRAM) {
3206 mp0.s.cl = 0x0;
3207 if (cl > 9)
3208 mp0.s.cl = 0x1;
3209 if (cl > 10)
3210 mp0.s.cl = 0x2;
3211 if (cl > 11)
3212 mp0.s.cl = 0x3;
3213 if (cl > 12)
3214 mp0.s.cl = 0x4;
3215 if (cl > 13)
3216 mp0.s.cl = 0x5;
3217 if (cl > 14)
3218 mp0.s.cl = 0x6;
3219 if (cl > 15)
3220 mp0.s.cl = 0x7;
3221 if (cl > 16)
3222 mp0.s.cl = 0x8;
3223 if (cl > 18)
3224 mp0.s.cl = 0x9;
3225 if (cl > 20)
3226 mp0.s.cl = 0xA;
3227 if (cl > 24)
3228 mp0.s.cl = 0xB;
3229 } else {
3230 mp0.s.cl = 0x2;
3231 if (cl > 5)
3232 mp0.s.cl = 0x4;
3233 if (cl > 6)
3234 mp0.s.cl = 0x6;
3235 if (cl > 7)
3236 mp0.s.cl = 0x8;
3237 if (cl > 8)
3238 mp0.s.cl = 0xA;
3239 if (cl > 9)
3240 mp0.s.cl = 0xC;
3241 if (cl > 10)
3242 mp0.s.cl = 0xE;
3243 if (cl > 11)
3244 mp0.s.cl = 0x1;
3245 if (cl > 12)
3246 mp0.s.cl = 0x3;
3247 if (cl > 13)
3248 mp0.s.cl = 0x5;
3249 if (cl > 14)
3250 mp0.s.cl = 0x7;
3251 if (cl > 15)
3252 mp0.s.cl = 0x9;
3253 }
3254
3255 mp0.s.rbt = 0; /* Read Only. */
3256 mp0.s.tm = 0;
3257 mp0.s.dllr = 0;
3258
3259 param = divide_roundup(twr, tclk_psecs);
3260
3261 if (ddr_type == DDR4_DRAM) { /* DDR4 */
3262 mp0.s.wrp = 1;
3263 if (param > 12)
3264 mp0.s.wrp = 2;
3265 if (param > 14)
3266 mp0.s.wrp = 3;
3267 if (param > 16)
3268 mp0.s.wrp = 4;
3269 if (param > 18)
3270 mp0.s.wrp = 5;
3271 if (param > 20)
3272 mp0.s.wrp = 6;
3273 if (param > 24) /* RESERVED in DDR4 spec */
3274 mp0.s.wrp = 7;
3275 } else { /* DDR3 */
3276 mp0.s.wrp = 1;
3277 if (param > 5)
3278 mp0.s.wrp = 2;
3279 if (param > 6)
3280 mp0.s.wrp = 3;
3281 if (param > 7)
3282 mp0.s.wrp = 4;
3283 if (param > 8)
3284 mp0.s.wrp = 5;
3285 if (param > 10)
3286 mp0.s.wrp = 6;
3287 if (param > 12)
3288 mp0.s.wrp = 7;
3289 }
3290
3291 mp0.s.ppd = 0;
3292
3293 s = lookup_env(priv, "ddr_wrp");
3294 if (s)
3295 mp0.s.wrp = simple_strtoul(s, NULL, 0);
3296
3297 debug("%-45s : %d, [0x%x]\n",
3298 "Write recovery for auto precharge WRP, [CSR]", param, mp0.s.wrp);
3299
3300 s = lookup_env_ull(priv, "ddr_modereg_params0");
3301 if (s)
3302 mp0.u64 = simple_strtoull(s, NULL, 0);
3303
3304 debug("MODEREG_PARAMS0 : 0x%016llx\n",
3305 mp0.u64);
3306 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
3307}
3308
3309static void lmc_modereg_params1(struct ddr_priv *priv)
3310{
3311 union cvmx_lmcx_modereg_params1 mp1;
3312 char *s;
3313 int i;
3314
3315 mp1.u64 = odt_config[odt_idx].modereg_params1.u64;
3316
3317 /*
3318 * Special request: mismatched DIMM support. Slot 0: 2-Rank,
3319 * Slot 1: 1-Rank
3320 */
3321 if (rank_mask == 0x7) { /* 2-Rank, 1-Rank */
3322 mp1.s.rtt_nom_00 = 0;
3323 mp1.s.rtt_nom_01 = 3; /* rttnom_40ohm */
3324 mp1.s.rtt_nom_10 = 3; /* rttnom_40ohm */
3325 mp1.s.rtt_nom_11 = 0;
3326 dyn_rtt_nom_mask = 0x6;
3327 }
3328
3329 s = lookup_env(priv, "ddr_rtt_nom_mask");
3330 if (s)
3331 dyn_rtt_nom_mask = simple_strtoul(s, NULL, 0);
3332
3333 /*
3334 * Save the original rtt_nom settings before sweeping through
3335 * settings.
3336 */
3337 default_rtt_nom[0] = mp1.s.rtt_nom_00;
3338 default_rtt_nom[1] = mp1.s.rtt_nom_01;
3339 default_rtt_nom[2] = mp1.s.rtt_nom_10;
3340 default_rtt_nom[3] = mp1.s.rtt_nom_11;
3341
3342 ddr_rtt_nom_auto = c_cfg->ddr_rtt_nom_auto;
3343
3344 for (i = 0; i < 4; ++i) {
3345 u64 value;
3346
3347 s = lookup_env(priv, "ddr_rtt_nom_%1d%1d", !!(i & 2),
3348 !!(i & 1));
3349 if (!s)
3350 s = lookup_env(priv, "ddr%d_rtt_nom_%1d%1d", if_num,
3351 !!(i & 2), !!(i & 1));
3352 if (s) {
3353 value = simple_strtoul(s, NULL, 0);
3354 mp1.u64 &= ~((u64)0x7 << (i * 12 + 9));
3355 mp1.u64 |= ((value & 0x7) << (i * 12 + 9));
3356 default_rtt_nom[i] = value;
3357 ddr_rtt_nom_auto = 0;
3358 }
3359 }
3360
3361 s = lookup_env(priv, "ddr_rtt_nom");
3362 if (!s)
3363 s = lookup_env(priv, "ddr%d_rtt_nom", if_num);
3364 if (s) {
3365 u64 value;
3366
3367 value = simple_strtoul(s, NULL, 0);
3368
3369 if (dyn_rtt_nom_mask & 1) {
3370 default_rtt_nom[0] = value;
3371 mp1.s.rtt_nom_00 = value;
3372 }
3373 if (dyn_rtt_nom_mask & 2) {
3374 default_rtt_nom[1] = value;
3375 mp1.s.rtt_nom_01 = value;
3376 }
3377 if (dyn_rtt_nom_mask & 4) {
3378 default_rtt_nom[2] = value;
3379 mp1.s.rtt_nom_10 = value;
3380 }
3381 if (dyn_rtt_nom_mask & 8) {
3382 default_rtt_nom[3] = value;
3383 mp1.s.rtt_nom_11 = value;
3384 }
3385
3386 ddr_rtt_nom_auto = 0;
3387 }
3388
3389 for (i = 0; i < 4; ++i) {
3390 u64 value;
3391
3392 s = lookup_env(priv, "ddr_rtt_wr_%1d%1d", !!(i & 2), !!(i & 1));
3393 if (!s)
3394 s = lookup_env(priv, "ddr%d_rtt_wr_%1d%1d", if_num,
3395 !!(i & 2), !!(i & 1));
3396 if (s) {
3397 value = simple_strtoul(s, NULL, 0);
3398 insrt_wr(&mp1.u64, i, value);
3399 }
3400 }
3401
3402 // Make sure 78XX pass 1 has valid RTT_WR settings, because
3403 // configuration files may be set-up for later chips, and
3404 // 78XX pass 1 supports no RTT_WR extension bits
3405 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3406 for (i = 0; i < 4; ++i) {
3407 // if 80 or undefined
3408 if (extr_wr(mp1.u64, i) > 3) {
3409 // FIXME? always insert 120
3410 insrt_wr(&mp1.u64, i, 1);
3411 debug("RTT_WR_%d%d set to 120 for CN78XX pass 1\n",
3412 !!(i & 2), i & 1);
3413 }
3414 }
3415 }
3416
3417 s = lookup_env(priv, "ddr_dic");
3418 if (s) {
3419 u64 value = simple_strtoul(s, NULL, 0);
3420
3421 for (i = 0; i < 4; ++i) {
3422 mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3423 mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3424 }
3425 }
3426
3427 for (i = 0; i < 4; ++i) {
3428 u64 value;
3429
3430 s = lookup_env(priv, "ddr_dic_%1d%1d", !!(i & 2), !!(i & 1));
3431 if (s) {
3432 value = simple_strtoul(s, NULL, 0);
3433 mp1.u64 &= ~((u64)0x3 << (i * 12 + 7));
3434 mp1.u64 |= ((value & 0x3) << (i * 12 + 7));
3435 }
3436 }
3437
3438 s = lookup_env_ull(priv, "ddr_modereg_params1");
3439 if (s)
3440 mp1.u64 = simple_strtoull(s, NULL, 0);
3441
3442 debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3443 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
3444 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
3445 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
3446 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
3447 mp1.s.rtt_nom_11,
3448 mp1.s.rtt_nom_10, mp1.s.rtt_nom_01, mp1.s.rtt_nom_00);
3449
3450 debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3451 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
3452 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
3453 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
3454 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
3455 extr_wr(mp1.u64, 3),
3456 extr_wr(mp1.u64, 2), extr_wr(mp1.u64, 1), extr_wr(mp1.u64, 0));
3457
3458 debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3459 imp_val->dic_ohms[mp1.s.dic_11],
3460 imp_val->dic_ohms[mp1.s.dic_10],
3461 imp_val->dic_ohms[mp1.s.dic_01],
3462 imp_val->dic_ohms[mp1.s.dic_00],
3463 mp1.s.dic_11, mp1.s.dic_10, mp1.s.dic_01, mp1.s.dic_00);
3464
3465 debug("MODEREG_PARAMS1 : 0x%016llx\n",
3466 mp1.u64);
3467 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num), mp1.u64);
3468}
3469
3470static void lmc_modereg_params2(struct ddr_priv *priv)
3471{
3472 char *s;
3473 int i;
3474
3475 if (ddr_type == DDR4_DRAM) {
3476 union cvmx_lmcx_modereg_params2 mp2;
3477
3478 mp2.u64 = odt_config[odt_idx].modereg_params2.u64;
3479
3480 s = lookup_env(priv, "ddr_rtt_park");
3481 if (s) {
3482 u64 value = simple_strtoul(s, NULL, 0);
3483
3484 for (i = 0; i < 4; ++i) {
3485 mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3486 mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3487 }
3488 }
3489
3490 for (i = 0; i < 4; ++i) {
3491 u64 value;
3492
3493 s = lookup_env(priv, "ddr_rtt_park_%1d%1d", !!(i & 2),
3494 !!(i & 1));
3495 if (s) {
3496 value = simple_strtoul(s, NULL, 0);
3497 mp2.u64 &= ~((u64)0x7 << (i * 10 + 0));
3498 mp2.u64 |= ((value & 0x7) << (i * 10 + 0));
3499 }
3500 }
3501
3502 s = lookup_env_ull(priv, "ddr_modereg_params2");
3503 if (s)
3504 mp2.u64 = simple_strtoull(s, NULL, 0);
3505
3506 debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
3507 imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
3508 imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
3509 imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
3510 imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
3511 mp2.s.rtt_park_11, mp2.s.rtt_park_10, mp2.s.rtt_park_01,
3512 mp2.s.rtt_park_00);
3513
3514 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_RANGE",
3515 mp2.s.vref_range_11,
3516 mp2.s.vref_range_10,
3517 mp2.s.vref_range_01, mp2.s.vref_range_00);
3518
3519 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n", "VREF_VALUE",
3520 mp2.s.vref_value_11,
3521 mp2.s.vref_value_10,
3522 mp2.s.vref_value_01, mp2.s.vref_value_00);
3523
3524 debug("MODEREG_PARAMS2 : 0x%016llx\n",
3525 mp2.u64);
3526 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS2(if_num), mp2.u64);
3527 }
3528}
3529
3530static void lmc_modereg_params3(struct ddr_priv *priv)
3531{
3532 char *s;
3533
3534 if (ddr_type == DDR4_DRAM) {
3535 union cvmx_lmcx_modereg_params3 mp3;
3536
3537 mp3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num));
3538 /* Disable as workaround to Errata 20547 */
3539 mp3.s.rd_dbi = 0;
3540 mp3.s.tccd_l = max(divide_roundup(ddr4_tccd_lmin, tclk_psecs),
3541 5ull) - 4;
3542
3543 s = lookup_env(priv, "ddr_rd_preamble");
3544 if (s)
3545 mp3.s.rd_preamble = !!simple_strtoul(s, NULL, 0);
3546
3547 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
3548 int delay = 0;
3549
3550 if (lranks_per_prank == 4 && ddr_hertz >= 1000000000)
3551 delay = 1;
3552
3553 mp3.s.xrank_add_tccd_l = delay;
3554 mp3.s.xrank_add_tccd_s = delay;
3555 }
3556
3557 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(if_num), mp3.u64);
3558 debug("MODEREG_PARAMS3 : 0x%016llx\n",
3559 mp3.u64);
3560 }
3561}
3562
3563static void lmc_nxm(struct ddr_priv *priv)
3564{
3565 union cvmx_lmcx_nxm lmc_nxm;
3566 int num_bits = row_lsb + row_bits + lranks_bits - 26;
3567 char *s;
3568
3569 lmc_nxm.u64 = lmc_rd(priv, CVMX_LMCX_NXM(if_num));
3570
3571 /* .cn78xx. */
3572 if (rank_mask & 0x1)
3573 lmc_nxm.cn78xx.mem_msb_d0_r0 = num_bits;
3574 if (rank_mask & 0x2)
3575 lmc_nxm.cn78xx.mem_msb_d0_r1 = num_bits;
3576 if (rank_mask & 0x4)
3577 lmc_nxm.cn78xx.mem_msb_d1_r0 = num_bits;
3578 if (rank_mask & 0x8)
3579 lmc_nxm.cn78xx.mem_msb_d1_r1 = num_bits;
3580
3581 /* Set the mask for non-existent ranks. */
3582 lmc_nxm.cn78xx.cs_mask = ~rank_mask & 0xff;
3583
3584 s = lookup_env_ull(priv, "ddr_nxm");
3585 if (s)
3586 lmc_nxm.u64 = simple_strtoull(s, NULL, 0);
3587
3588 debug("LMC_NXM : 0x%016llx\n",
3589 lmc_nxm.u64);
3590 lmc_wr(priv, CVMX_LMCX_NXM(if_num), lmc_nxm.u64);
3591}
3592
3593static void lmc_wodt_mask(struct ddr_priv *priv)
3594{
3595 union cvmx_lmcx_wodt_mask wodt_mask;
3596 char *s;
3597
3598 wodt_mask.u64 = odt_config[odt_idx].odt_mask;
3599
3600 s = lookup_env_ull(priv, "ddr_wodt_mask");
3601 if (s)
3602 wodt_mask.u64 = simple_strtoull(s, NULL, 0);
3603
3604 debug("WODT_MASK : 0x%016llx\n",
3605 wodt_mask.u64);
3606 lmc_wr(priv, CVMX_LMCX_WODT_MASK(if_num), wodt_mask.u64);
3607}
3608
3609static void lmc_rodt_mask(struct ddr_priv *priv)
3610{
3611 union cvmx_lmcx_rodt_mask rodt_mask;
3612 int rankx;
3613 char *s;
3614
3615 rodt_mask.u64 = odt_config[odt_idx].rodt_ctl;
3616
3617 s = lookup_env_ull(priv, "ddr_rodt_mask");
3618 if (s)
3619 rodt_mask.u64 = simple_strtoull(s, NULL, 0);
3620
3621 debug("%-45s : 0x%016llx\n", "RODT_MASK", rodt_mask.u64);
3622 lmc_wr(priv, CVMX_LMCX_RODT_MASK(if_num), rodt_mask.u64);
3623
3624 dyn_rtt_nom_mask = 0;
3625 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
3626 if (!(rank_mask & (1 << rankx)))
3627 continue;
3628 dyn_rtt_nom_mask |= ((rodt_mask.u64 >> (8 * rankx)) & 0xff);
3629 }
3630 if (num_ranks == 4) {
3631 /*
3632 * Normally ODT1 is wired to rank 1. For quad-ranked DIMMs
3633 * ODT1 is wired to the third rank (rank 2). The mask,
3634 * dyn_rtt_nom_mask, is used to indicate for which ranks
3635 * to sweep RTT_NOM during read-leveling. Shift the bit
3636 * from the ODT1 position over to the "ODT2" position so
3637 * that the read-leveling analysis comes out right.
3638 */
3639 int odt1_bit = dyn_rtt_nom_mask & 2;
3640
3641 dyn_rtt_nom_mask &= ~2;
3642 dyn_rtt_nom_mask |= odt1_bit << 1;
3643 }
3644 debug("%-45s : 0x%02x\n", "DYN_RTT_NOM_MASK", dyn_rtt_nom_mask);
3645}
3646
3647static void lmc_comp_ctl2(struct ddr_priv *priv)
3648{
3649 union cvmx_lmcx_comp_ctl2 cc2;
3650 char *s;
3651
3652 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
3653
3654 cc2.cn78xx.dqx_ctl = odt_config[odt_idx].odt_ena;
3655 /* Default 4=34.3 ohm */
3656 cc2.cn78xx.ck_ctl = (c_cfg->ck_ctl == 0) ? 4 : c_cfg->ck_ctl;
3657 /* Default 4=34.3 ohm */
3658 cc2.cn78xx.cmd_ctl = (c_cfg->cmd_ctl == 0) ? 4 : c_cfg->cmd_ctl;
3659 /* Default 4=34.3 ohm */
3660 cc2.cn78xx.control_ctl = (c_cfg->ctl_ctl == 0) ? 4 : c_cfg->ctl_ctl;
3661
3662 ddr_rodt_ctl_auto = c_cfg->ddr_rodt_ctl_auto;
3663 s = lookup_env(priv, "ddr_rodt_ctl_auto");
3664 if (s)
3665 ddr_rodt_ctl_auto = !!simple_strtoul(s, NULL, 0);
3666
3667 default_rodt_ctl = odt_config[odt_idx].qs_dic;
3668 s = lookup_env(priv, "ddr_rodt_ctl");
3669 if (!s)
3670 s = lookup_env(priv, "ddr%d_rodt_ctl", if_num);
3671 if (s) {
3672 default_rodt_ctl = simple_strtoul(s, NULL, 0);
3673 ddr_rodt_ctl_auto = 0;
3674 }
3675
3676 cc2.cn70xx.rodt_ctl = default_rodt_ctl;
3677
3678 // if DDR4, force CK_CTL to 26 ohms if it is currently 34 ohms,
3679 // and DCLK speed is 1 GHz or more...
3680 if (ddr_type == DDR4_DRAM && cc2.s.ck_ctl == ddr4_driver_34_ohm &&
3681 ddr_hertz >= 1000000000) {
3682 // lowest for DDR4 is 26 ohms
3683 cc2.s.ck_ctl = ddr4_driver_26_ohm;
3684 debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CK_CTL] to %d, %d ohms\n",
3685 node, if_num, cc2.s.ck_ctl,
3686 imp_val->drive_strength[cc2.s.ck_ctl]);
3687 }
3688
3689 // if DDR4, 2DPC, UDIMM, force CONTROL_CTL and CMD_CTL to 26 ohms,
3690 // if DCLK speed is 1 GHz or more...
3691 if (ddr_type == DDR4_DRAM && dimm_count == 2 &&
3692 (spd_dimm_type == 2 || spd_dimm_type == 6) &&
3693 ddr_hertz >= 1000000000) {
3694 // lowest for DDR4 is 26 ohms
3695 cc2.cn78xx.control_ctl = ddr4_driver_26_ohm;
3696 // lowest for DDR4 is 26 ohms
3697 cc2.cn78xx.cmd_ctl = ddr4_driver_26_ohm;
3698 debug("N%d.LMC%d: Forcing DDR4 COMP_CTL2[CONTROL_CTL,CMD_CTL] to %d, %d ohms\n",
3699 node, if_num, ddr4_driver_26_ohm,
3700 imp_val->drive_strength[ddr4_driver_26_ohm]);
3701 }
3702
3703 s = lookup_env(priv, "ddr_ck_ctl");
3704 if (s)
3705 cc2.cn78xx.ck_ctl = simple_strtoul(s, NULL, 0);
3706
3707 s = lookup_env(priv, "ddr_cmd_ctl");
3708 if (s)
3709 cc2.cn78xx.cmd_ctl = simple_strtoul(s, NULL, 0);
3710
3711 s = lookup_env(priv, "ddr_control_ctl");
3712 if (s)
3713 cc2.cn70xx.control_ctl = simple_strtoul(s, NULL, 0);
3714
3715 s = lookup_env(priv, "ddr_dqx_ctl");
3716 if (s)
3717 cc2.cn78xx.dqx_ctl = simple_strtoul(s, NULL, 0);
3718
3719 debug("%-45s : %d, %d ohms\n", "DQX_CTL ", cc2.cn78xx.dqx_ctl,
3720 imp_val->drive_strength[cc2.cn78xx.dqx_ctl]);
3721 debug("%-45s : %d, %d ohms\n", "CK_CTL ", cc2.cn78xx.ck_ctl,
3722 imp_val->drive_strength[cc2.cn78xx.ck_ctl]);
3723 debug("%-45s : %d, %d ohms\n", "CMD_CTL ", cc2.cn78xx.cmd_ctl,
3724 imp_val->drive_strength[cc2.cn78xx.cmd_ctl]);
3725 debug("%-45s : %d, %d ohms\n", "CONTROL_CTL ",
3726 cc2.cn78xx.control_ctl,
3727 imp_val->drive_strength[cc2.cn78xx.control_ctl]);
3728 debug("Read ODT_CTL : 0x%x (%d ohms)\n",
3729 cc2.cn78xx.rodt_ctl, imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
3730
3731 debug("%-45s : 0x%016llx\n", "COMP_CTL2", cc2.u64);
3732 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
3733}
3734
3735static void lmc_phy_ctl(struct ddr_priv *priv)
3736{
3737 union cvmx_lmcx_phy_ctl phy_ctl;
3738
3739 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
3740 phy_ctl.s.ts_stagger = 0;
3741 // FIXME: are there others TBD?
3742 phy_ctl.s.dsk_dbg_overwrt_ena = 0;
3743
3744 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3745 // C0 is TEN, C1 is A17
3746 phy_ctl.s.c0_sel = 2;
3747 phy_ctl.s.c1_sel = 2;
3748 debug("N%d.LMC%d: 3DS: setting PHY_CTL[cx_csel] = %d\n",
3749 node, if_num, phy_ctl.s.c1_sel);
3750 }
3751
3752 debug("PHY_CTL : 0x%016llx\n",
3753 phy_ctl.u64);
3754 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
3755}
3756
3757static void lmc_ext_config(struct ddr_priv *priv)
3758{
3759 union cvmx_lmcx_ext_config ext_cfg;
3760 char *s;
3761
3762 ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
3763 ext_cfg.s.vrefint_seq_deskew = 0;
3764 ext_cfg.s.read_ena_bprch = 1;
3765 ext_cfg.s.read_ena_fprch = 1;
3766 ext_cfg.s.drive_ena_fprch = 1;
3767 ext_cfg.s.drive_ena_bprch = 1;
3768 // make sure this is OFF for all current chips
3769 ext_cfg.s.invert_data = 0;
3770
3771 s = lookup_env(priv, "ddr_read_fprch");
3772 if (s)
3773 ext_cfg.s.read_ena_fprch = strtoul(s, NULL, 0);
3774
3775 s = lookup_env(priv, "ddr_read_bprch");
3776 if (s)
3777 ext_cfg.s.read_ena_bprch = strtoul(s, NULL, 0);
3778
3779 s = lookup_env(priv, "ddr_drive_fprch");
3780 if (s)
3781 ext_cfg.s.drive_ena_fprch = strtoul(s, NULL, 0);
3782
3783 s = lookup_env(priv, "ddr_drive_bprch");
3784 if (s)
3785 ext_cfg.s.drive_ena_bprch = strtoul(s, NULL, 0);
3786
3787 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) && lranks_per_prank > 1) {
3788 ext_cfg.s.dimm0_cid = lranks_bits;
3789 ext_cfg.s.dimm1_cid = lranks_bits;
3790 debug("N%d.LMC%d: 3DS: setting EXT_CONFIG[dimmx_cid] = %d\n",
3791 node, if_num, ext_cfg.s.dimm0_cid);
3792 }
3793
3794 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
3795 debug("%-45s : 0x%016llx\n", "EXT_CONFIG", ext_cfg.u64);
3796}
3797
3798static void lmc_ext_config2(struct ddr_priv *priv)
3799{
3800 char *s;
3801
3802 // NOTE: all chips have this register, but not necessarily the
3803 // fields we modify...
3804 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
3805 !octeon_is_cpuid(OCTEON_CN73XX)) {
3806 union cvmx_lmcx_ext_config2 ext_cfg2;
3807 int value = 1; // default to 1
3808
3809 ext_cfg2.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG2(if_num));
3810
3811 s = lookup_env(priv, "ddr_ext2_delay_unload");
3812 if (s)
3813 value = !!simple_strtoul(s, NULL, 0);
3814
3815 ext_cfg2.s.delay_unload_r0 = value;
3816 ext_cfg2.s.delay_unload_r1 = value;
3817 ext_cfg2.s.delay_unload_r2 = value;
3818 ext_cfg2.s.delay_unload_r3 = value;
3819
3820 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG2(if_num), ext_cfg2.u64);
3821 debug("%-45s : 0x%016llx\n", "EXT_CONFIG2", ext_cfg2.u64);
3822 }
3823}
3824
3825static void lmc_dimm01_params_loop(struct ddr_priv *priv)
3826{
3827 union cvmx_lmcx_dimmx_params dimm_p;
3828 int dimmx = didx;
3829 char *s;
3830 int rc;
3831 int i;
3832
3833 dimm_p.u64 = lmc_rd(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num));
3834
3835 if (ddr_type == DDR4_DRAM) {
3836 union cvmx_lmcx_dimmx_ddr4_params0 ddr4_p0;
3837 union cvmx_lmcx_dimmx_ddr4_params1 ddr4_p1;
3838 union cvmx_lmcx_ddr4_dimm_ctl ddr4_ctl;
3839
3840 dimm_p.s.rc0 = 0;
3841 dimm_p.s.rc1 = 0;
3842 dimm_p.s.rc2 = 0;
3843
3844 rc = read_spd(&dimm_config_table[didx], 0,
3845 DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CTL);
3846 dimm_p.s.rc3 = (rc >> 4) & 0xf;
3847 dimm_p.s.rc4 = ((rc >> 0) & 0x3) << 2;
3848 dimm_p.s.rc4 |= ((rc >> 2) & 0x3) << 0;
3849
3850 rc = read_spd(&dimm_config_table[didx], 0,
3851 DDR4_SPD_RDIMM_REGISTER_DRIVE_STRENGTH_CK);
3852 dimm_p.s.rc5 = ((rc >> 0) & 0x3) << 2;
3853 dimm_p.s.rc5 |= ((rc >> 2) & 0x3) << 0;
3854
3855 dimm_p.s.rc6 = 0;
3856 dimm_p.s.rc7 = 0;
3857 dimm_p.s.rc8 = 0;
3858 dimm_p.s.rc9 = 0;
3859
3860 /*
3861 * rc10 DDR4 RDIMM Operating Speed
3862 * === ===================================================
3863 * 0 tclk_psecs >= 1250 psec DDR4-1600 (1250 ps)
3864 * 1 1250 psec > tclk_psecs >= 1071 psec DDR4-1866 (1071 ps)
3865 * 2 1071 psec > tclk_psecs >= 938 psec DDR4-2133 ( 938 ps)
3866 * 3 938 psec > tclk_psecs >= 833 psec DDR4-2400 ( 833 ps)
3867 * 4 833 psec > tclk_psecs >= 750 psec DDR4-2666 ( 750 ps)
3868 * 5 750 psec > tclk_psecs >= 625 psec DDR4-3200 ( 625 ps)
3869 */
3870 dimm_p.s.rc10 = 0;
3871 if (tclk_psecs < 1250)
3872 dimm_p.s.rc10 = 1;
3873 if (tclk_psecs < 1071)
3874 dimm_p.s.rc10 = 2;
3875 if (tclk_psecs < 938)
3876 dimm_p.s.rc10 = 3;
3877 if (tclk_psecs < 833)
3878 dimm_p.s.rc10 = 4;
3879 if (tclk_psecs < 750)
3880 dimm_p.s.rc10 = 5;
3881
3882 dimm_p.s.rc11 = 0;
3883 dimm_p.s.rc12 = 0;
3884 /* 0=LRDIMM, 1=RDIMM */
3885 dimm_p.s.rc13 = (spd_dimm_type == 4) ? 0 : 4;
3886 dimm_p.s.rc13 |= (ddr_type == DDR4_DRAM) ?
3887 (spd_addr_mirror << 3) : 0;
3888 dimm_p.s.rc14 = 0;
3889 dimm_p.s.rc15 = 0; /* 1 nCK latency adder */
3890
3891 ddr4_p0.u64 = 0;
3892
3893 ddr4_p0.s.rc8x = 0;
3894 ddr4_p0.s.rc7x = 0;
3895 ddr4_p0.s.rc6x = 0;
3896 ddr4_p0.s.rc5x = 0;
3897 ddr4_p0.s.rc4x = 0;
3898
3899 ddr4_p0.s.rc3x = compute_rc3x(tclk_psecs);
3900
3901 ddr4_p0.s.rc2x = 0;
3902 ddr4_p0.s.rc1x = 0;
3903
3904 ddr4_p1.u64 = 0;
3905
3906 ddr4_p1.s.rcbx = 0;
3907 ddr4_p1.s.rcax = 0;
3908 ddr4_p1.s.rc9x = 0;
3909
3910 ddr4_ctl.u64 = 0;
3911 ddr4_ctl.cn70xx.ddr4_dimm0_wmask = 0x004;
3912 ddr4_ctl.cn70xx.ddr4_dimm1_wmask =
3913 (dimm_count > 1) ? 0x004 : 0x0000;
3914
3915 /*
3916 * Handle any overrides from envvars here...
3917 */
3918 s = lookup_env(priv, "ddr_ddr4_params0");
3919 if (s)
3920 ddr4_p0.u64 = simple_strtoul(s, NULL, 0);
3921
3922 s = lookup_env(priv, "ddr_ddr4_params1");
3923 if (s)
3924 ddr4_p1.u64 = simple_strtoul(s, NULL, 0);
3925
3926 s = lookup_env(priv, "ddr_ddr4_dimm_ctl");
3927 if (s)
3928 ddr4_ctl.u64 = simple_strtoul(s, NULL, 0);
3929
3930 for (i = 0; i < 11; ++i) {
3931 u64 value;
3932
3933 s = lookup_env(priv, "ddr_ddr4_rc%1xx", i + 1);
3934 if (s) {
3935 value = simple_strtoul(s, NULL, 0);
3936 if (i < 8) {
3937 ddr4_p0.u64 &= ~((u64)0xff << (i * 8));
3938 ddr4_p0.u64 |= (value << (i * 8));
3939 } else {
3940 ddr4_p1.u64 &=
3941 ~((u64)0xff << ((i - 8) * 8));
3942 ddr4_p1.u64 |= (value << ((i - 8) * 8));
3943 }
3944 }
3945 }
3946
3947 /*
3948 * write the final CSR values
3949 */
3950 lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS0(dimmx, if_num),
3951 ddr4_p0.u64);
3952
3953 lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), ddr4_ctl.u64);
3954
3955 lmc_wr(priv, CVMX_LMCX_DIMMX_DDR4_PARAMS1(dimmx, if_num),
3956 ddr4_p1.u64);
3957
3958 debug("DIMM%d Register Control Words RCBx:RC1x : %x %x %x %x %x %x %x %x %x %x %x\n",
3959 dimmx, ddr4_p1.s.rcbx, ddr4_p1.s.rcax,
3960 ddr4_p1.s.rc9x, ddr4_p0.s.rc8x,
3961 ddr4_p0.s.rc7x, ddr4_p0.s.rc6x,
3962 ddr4_p0.s.rc5x, ddr4_p0.s.rc4x,
3963 ddr4_p0.s.rc3x, ddr4_p0.s.rc2x, ddr4_p0.s.rc1x);
3964
3965 } else {
3966 rc = read_spd(&dimm_config_table[didx], 0, 69);
3967 dimm_p.s.rc0 = (rc >> 0) & 0xf;
3968 dimm_p.s.rc1 = (rc >> 4) & 0xf;
3969
3970 rc = read_spd(&dimm_config_table[didx], 0, 70);
3971 dimm_p.s.rc2 = (rc >> 0) & 0xf;
3972 dimm_p.s.rc3 = (rc >> 4) & 0xf;
3973
3974 rc = read_spd(&dimm_config_table[didx], 0, 71);
3975 dimm_p.s.rc4 = (rc >> 0) & 0xf;
3976 dimm_p.s.rc5 = (rc >> 4) & 0xf;
3977
3978 rc = read_spd(&dimm_config_table[didx], 0, 72);
3979 dimm_p.s.rc6 = (rc >> 0) & 0xf;
3980 dimm_p.s.rc7 = (rc >> 4) & 0xf;
3981
3982 rc = read_spd(&dimm_config_table[didx], 0, 73);
3983 dimm_p.s.rc8 = (rc >> 0) & 0xf;
3984 dimm_p.s.rc9 = (rc >> 4) & 0xf;
3985
3986 rc = read_spd(&dimm_config_table[didx], 0, 74);
3987 dimm_p.s.rc10 = (rc >> 0) & 0xf;
3988 dimm_p.s.rc11 = (rc >> 4) & 0xf;
3989
3990 rc = read_spd(&dimm_config_table[didx], 0, 75);
3991 dimm_p.s.rc12 = (rc >> 0) & 0xf;
3992 dimm_p.s.rc13 = (rc >> 4) & 0xf;
3993
3994 rc = read_spd(&dimm_config_table[didx], 0, 76);
3995 dimm_p.s.rc14 = (rc >> 0) & 0xf;
3996 dimm_p.s.rc15 = (rc >> 4) & 0xf;
3997
3998 s = ddr_getenv_debug(priv, "ddr_clk_drive");
3999 if (s) {
4000 if (strcmp(s, "light") == 0)
4001 dimm_p.s.rc5 = 0x0; /* Light Drive */
4002 if (strcmp(s, "moderate") == 0)
4003 dimm_p.s.rc5 = 0x5; /* Moderate Drive */
4004 if (strcmp(s, "strong") == 0)
4005 dimm_p.s.rc5 = 0xA; /* Strong Drive */
4006 printf("Parameter found in environment. ddr_clk_drive = %s\n",
4007 s);
4008 }
4009
4010 s = ddr_getenv_debug(priv, "ddr_cmd_drive");
4011 if (s) {
4012 if (strcmp(s, "light") == 0)
4013 dimm_p.s.rc3 = 0x0; /* Light Drive */
4014 if (strcmp(s, "moderate") == 0)
4015 dimm_p.s.rc3 = 0x5; /* Moderate Drive */
4016 if (strcmp(s, "strong") == 0)
4017 dimm_p.s.rc3 = 0xA; /* Strong Drive */
4018 printf("Parameter found in environment. ddr_cmd_drive = %s\n",
4019 s);
4020 }
4021
4022 s = ddr_getenv_debug(priv, "ddr_ctl_drive");
4023 if (s) {
4024 if (strcmp(s, "light") == 0)
4025 dimm_p.s.rc4 = 0x0; /* Light Drive */
4026 if (strcmp(s, "moderate") == 0)
4027 dimm_p.s.rc4 = 0x5; /* Moderate Drive */
4028 printf("Parameter found in environment. ddr_ctl_drive = %s\n",
4029 s);
4030 }
4031
4032 /*
4033 * rc10 DDR3 RDIMM Operating Speed
4034 * == =====================================================
4035 * 0 tclk_psecs >= 2500 psec DDR3/DDR3L-800 def
4036 * 1 2500 psec > tclk_psecs >= 1875 psec DDR3/DDR3L-1066
4037 * 2 1875 psec > tclk_psecs >= 1500 psec DDR3/DDR3L-1333
4038 * 3 1500 psec > tclk_psecs >= 1250 psec DDR3/DDR3L-1600
4039 * 4 1250 psec > tclk_psecs >= 1071 psec DDR3-1866
4040 */
4041 dimm_p.s.rc10 = 0;
4042 if (tclk_psecs < 2500)
4043 dimm_p.s.rc10 = 1;
4044 if (tclk_psecs < 1875)
4045 dimm_p.s.rc10 = 2;
4046 if (tclk_psecs < 1500)
4047 dimm_p.s.rc10 = 3;
4048 if (tclk_psecs < 1250)
4049 dimm_p.s.rc10 = 4;
4050 }
4051
4052 s = lookup_env(priv, "ddr_dimmx_params", i);
4053 if (s)
4054 dimm_p.u64 = simple_strtoul(s, NULL, 0);
4055
4056 for (i = 0; i < 16; ++i) {
4057 u64 value;
4058
4059 s = lookup_env(priv, "ddr_rc%d", i);
4060 if (s) {
4061 value = simple_strtoul(s, NULL, 0);
4062 dimm_p.u64 &= ~((u64)0xf << (i * 4));
4063 dimm_p.u64 |= (value << (i * 4));
4064 }
4065 }
4066
4067 lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(dimmx, if_num), dimm_p.u64);
4068
4069 debug("DIMM%d Register Control Words RC15:RC0 : %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x\n",
4070 dimmx, dimm_p.s.rc15, dimm_p.s.rc14, dimm_p.s.rc13,
4071 dimm_p.s.rc12, dimm_p.s.rc11, dimm_p.s.rc10,
4072 dimm_p.s.rc9, dimm_p.s.rc8, dimm_p.s.rc7,
4073 dimm_p.s.rc6, dimm_p.s.rc5, dimm_p.s.rc4,
4074 dimm_p.s.rc3, dimm_p.s.rc2, dimm_p.s.rc1, dimm_p.s.rc0);
4075
4076 // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2 registers,
4077 // and treat it specially
4078 if (ddr_type == DDR3_DRAM && num_ranks == 4 &&
4079 spd_rdimm_registers == 2 && dimmx == 0) {
4080 debug("DDR3: Copying DIMM0_PARAMS to DIMM1_PARAMS for pseudo-DIMM #1...\n");
4081 lmc_wr(priv, CVMX_LMCX_DIMMX_PARAMS(1, if_num), dimm_p.u64);
4082 }
4083}
4084
4085static void lmc_dimm01_params(struct ddr_priv *priv)
4086{
4087 union cvmx_lmcx_dimm_ctl dimm_ctl;
4088 char *s;
4089
4090 if (spd_rdimm) {
4091 for (didx = 0; didx < (unsigned int)dimm_count; ++didx)
4092 lmc_dimm01_params_loop(priv);
4093
4094 if (ddr_type == DDR4_DRAM) {
4095 /* LMC0_DIMM_CTL */
4096 dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4097 dimm_ctl.s.dimm0_wmask = 0xdf3f;
4098 dimm_ctl.s.dimm1_wmask =
4099 (dimm_count > 1) ? 0xdf3f : 0x0000;
4100 dimm_ctl.s.tcws = 0x4e0;
4101 dimm_ctl.s.parity = c_cfg->parity;
4102
4103 s = lookup_env(priv, "ddr_dimm0_wmask");
4104 if (s) {
4105 dimm_ctl.s.dimm0_wmask =
4106 simple_strtoul(s, NULL, 0);
4107 }
4108
4109 s = lookup_env(priv, "ddr_dimm1_wmask");
4110 if (s) {
4111 dimm_ctl.s.dimm1_wmask =
4112 simple_strtoul(s, NULL, 0);
4113 }
4114
4115 s = lookup_env(priv, "ddr_dimm_ctl_parity");
4116 if (s)
4117 dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4118
4119 s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4120 if (s)
4121 dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4122
4123 debug("LMC DIMM_CTL : 0x%016llx\n",
4124 dimm_ctl.u64);
4125 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4126
4127 /* Init RCW */
4128 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4129
4130 /* Write RC0D last */
4131 dimm_ctl.s.dimm0_wmask = 0x2000;
4132 dimm_ctl.s.dimm1_wmask = (dimm_count > 1) ?
4133 0x2000 : 0x0000;
4134 debug("LMC DIMM_CTL : 0x%016llx\n",
4135 dimm_ctl.u64);
4136 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4137
4138 /*
4139 * Don't write any extended registers the second time
4140 */
4141 lmc_wr(priv, CVMX_LMCX_DDR4_DIMM_CTL(if_num), 0);
4142
4143 /* Init RCW */
4144 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4145 } else {
4146 /* LMC0_DIMM_CTL */
4147 dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4148 dimm_ctl.s.dimm0_wmask = 0xffff;
4149 // FIXME: recognize a DDR3 RDIMM with 4 ranks and 2
4150 // registers, and treat it specially
4151 if (num_ranks == 4 && spd_rdimm_registers == 2) {
4152 debug("DDR3: Activating DIMM_CTL[dimm1_mask] bits...\n");
4153 dimm_ctl.s.dimm1_wmask = 0xffff;
4154 } else {
4155 dimm_ctl.s.dimm1_wmask =
4156 (dimm_count > 1) ? 0xffff : 0x0000;
4157 }
4158 dimm_ctl.s.tcws = 0x4e0;
4159 dimm_ctl.s.parity = c_cfg->parity;
4160
4161 s = lookup_env(priv, "ddr_dimm0_wmask");
4162 if (s) {
4163 dimm_ctl.s.dimm0_wmask =
4164 simple_strtoul(s, NULL, 0);
4165 }
4166
4167 s = lookup_env(priv, "ddr_dimm1_wmask");
4168 if (s) {
4169 dimm_ctl.s.dimm1_wmask =
4170 simple_strtoul(s, NULL, 0);
4171 }
4172
4173 s = lookup_env(priv, "ddr_dimm_ctl_parity");
4174 if (s)
4175 dimm_ctl.s.parity = simple_strtoul(s, NULL, 0);
4176
4177 s = lookup_env(priv, "ddr_dimm_ctl_tcws");
4178 if (s)
4179 dimm_ctl.s.tcws = simple_strtoul(s, NULL, 0);
4180
4181 debug("LMC DIMM_CTL : 0x%016llx\n",
4182 dimm_ctl.u64);
4183 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4184
4185 /* Init RCW */
4186 oct3_ddr3_seq(priv, rank_mask, if_num, 0x7);
4187 }
4188
4189 } else {
4190 /* Disable register control writes for unbuffered */
4191 union cvmx_lmcx_dimm_ctl dimm_ctl;
4192
4193 dimm_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DIMM_CTL(if_num));
4194 dimm_ctl.s.dimm0_wmask = 0;
4195 dimm_ctl.s.dimm1_wmask = 0;
4196 lmc_wr(priv, CVMX_LMCX_DIMM_CTL(if_num), dimm_ctl.u64);
4197 }
4198}
4199
4200static int lmc_rank_init(struct ddr_priv *priv)
4201{
4202 char *s;
4203
4204 if (enable_by_rank_init) {
4205 by_rank = 3;
4206 saved_rank_mask = rank_mask;
4207 }
4208
4209start_by_rank_init:
4210
4211 if (enable_by_rank_init) {
4212 rank_mask = (1 << by_rank);
4213 if (!(rank_mask & saved_rank_mask))
4214 goto end_by_rank_init;
4215 if (by_rank == 0)
4216 rank_mask = saved_rank_mask;
4217
4218 debug("\n>>>>> BY_RANK: starting rank %d with mask 0x%02x\n\n",
4219 by_rank, rank_mask);
4220 }
4221
4222 /*
4223 * Comments (steps 3 through 5) continue in oct3_ddr3_seq()
4224 */
4225 union cvmx_lmcx_modereg_params0 mp0;
4226
4227 if (ddr_memory_preserved(priv)) {
4228 /*
4229 * Contents are being preserved. Take DRAM out of self-refresh
4230 * first. Then init steps can procede normally
4231 */
4232 /* self-refresh exit */
4233 oct3_ddr3_seq(priv, rank_mask, if_num, 3);
4234 }
4235
4236 mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
4237 mp0.s.dllr = 1; /* Set during first init sequence */
4238 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4239
4240 ddr_init_seq(priv, rank_mask, if_num);
4241
4242 mp0.s.dllr = 0; /* Clear for normal operation */
4243 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num), mp0.u64);
4244
4245 if (spd_rdimm && ddr_type == DDR4_DRAM &&
4246 octeon_is_cpuid(OCTEON_CN7XXX)) {
4247 debug("Running init sequence 1\n");
4248 change_rdimm_mpr_pattern(priv, rank_mask, if_num, dimm_count);
4249 }
4250
4251 memset(lanes, 0, sizeof(lanes));
4252 for (lane = 0; lane < last_lane; lane++) {
4253 // init all lanes to reset value
4254 dac_settings[lane] = 127;
4255 }
4256
4257 // FIXME: disable internal VREF if deskew is disabled?
4258 if (disable_deskew_training) {
4259 debug("N%d.LMC%d: internal VREF Training disabled, leaving them in RESET.\n",
4260 node, if_num);
4261 num_samples = 0;
4262 } else if (ddr_type == DDR4_DRAM &&
4263 !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
4264 num_samples = DEFAULT_DAC_SAMPLES;
4265 } else {
4266 // if DDR3 or no ability to write DAC values
4267 num_samples = 1;
4268 }
4269
4270perform_internal_vref_training:
4271
4272 total_dac_eval_retries = 0;
4273 dac_eval_exhausted = 0;
4274
4275 for (sample = 0; sample < num_samples; sample++) {
4276 dac_eval_retries = 0;
4277
4278 // make offset and internal vref training repeatable
4279 do {
4280 /*
4281 * 6.9.8 LMC Offset Training
4282 * LMC requires input-receiver offset training.
4283 */
4284 perform_offset_training(priv, rank_mask, if_num);
4285
4286 /*
4287 * 6.9.9 LMC Internal vref Training
4288 * LMC requires input-reference-voltage training.
4289 */
4290 perform_internal_vref_training(priv, rank_mask, if_num);
4291
4292 // read and maybe display the DAC values for a sample
4293 read_dac_dbi_settings(priv, if_num, /*DAC*/ 1,
4294 dac_settings);
4295 if (num_samples == 1 || ddr_verbose(priv)) {
4296 display_dac_dbi_settings(if_num, /*DAC*/ 1,
4297 use_ecc, dac_settings,
4298 "Internal VREF");
4299 }
4300
4301 // for DDR4, evaluate the DAC settings and retry
4302 // if any issues
4303 if (ddr_type == DDR4_DRAM) {
4304 if (evaluate_dac_settings
4305 (if_64b, use_ecc, dac_settings)) {
4306 dac_eval_retries += 1;
4307 if (dac_eval_retries >
4308 DAC_RETRIES_LIMIT) {
4309 debug("N%d.LMC%d: DDR4 internal VREF DAC settings: retries exhausted; continuing...\n",
4310 node, if_num);
4311 dac_eval_exhausted += 1;
4312 } else {
4313 debug("N%d.LMC%d: DDR4 internal VREF DAC settings inconsistent; retrying....\n",
4314 node, if_num);
4315 total_dac_eval_retries += 1;
4316 // try another sample
4317 continue;
4318 }
4319 }
4320
4321 // taking multiple samples, otherwise do nothing
4322 if (num_samples > 1) {
4323 // good sample or exhausted retries,
4324 // record it
4325 for (lane = 0; lane < last_lane;
4326 lane++) {
4327 lanes[lane].bytes[sample] =
4328 dac_settings[lane];
4329 }
4330 }
4331 }
4332 // done if DDR3, or good sample, or exhausted retries
4333 break;
4334 } while (1);
4335 }
4336
4337 if (ddr_type == DDR4_DRAM && dac_eval_exhausted > 0) {
4338 debug("N%d.LMC%d: DDR internal VREF DAC settings: total retries %d, exhausted %d\n",
4339 node, if_num, total_dac_eval_retries, dac_eval_exhausted);
4340 }
4341
4342 if (num_samples > 1) {
4343 debug("N%d.LMC%d: DDR4 internal VREF DAC settings: processing multiple samples...\n",
4344 node, if_num);
4345
4346 for (lane = 0; lane < last_lane; lane++) {
4347 dac_settings[lane] =
4348 process_samples_average(&lanes[lane].bytes[0],
4349 num_samples, if_num, lane);
4350 }
4351 display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4352 dac_settings, "Averaged VREF");
4353
4354 // finally, write the final DAC values
4355 for (lane = 0; lane < last_lane; lane++) {
4356 load_dac_override(priv, if_num, dac_settings[lane],
4357 lane);
4358 }
4359 }
4360
4361 // allow override of any byte-lane internal VREF
4362 int overrode_vref_dac = 0;
4363
4364 for (lane = 0; lane < last_lane; lane++) {
4365 s = lookup_env(priv, "ddr%d_vref_dac_byte%d", if_num, lane);
4366 if (s) {
4367 dac_settings[lane] = simple_strtoul(s, NULL, 0);
4368 overrode_vref_dac = 1;
4369 // finally, write the new DAC value
4370 load_dac_override(priv, if_num, dac_settings[lane],
4371 lane);
4372 }
4373 }
4374 if (overrode_vref_dac) {
4375 display_dac_dbi_settings(if_num, /*DAC*/ 1, use_ecc,
4376 dac_settings, "Override VREF");
4377 }
4378
4379 // as a second step, after internal VREF training, before starting
4380 // deskew training:
4381 // for DDR3 and OCTEON3 not O78 pass 1.x, override the DAC setting
4382 // to 127
4383 if (ddr_type == DDR3_DRAM && !octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) &&
4384 !disable_deskew_training) {
4385 load_dac_override(priv, if_num, 127, /* all */ 0x0A);
4386 debug("N%d.LMC%d: Overriding DDR3 internal VREF DAC settings to 127.\n",
4387 node, if_num);
4388 }
4389
4390 /*
4391 * 4.8.8 LMC Deskew Training
4392 *
4393 * LMC requires input-read-data deskew training.
4394 */
4395 if (!disable_deskew_training) {
4396 deskew_training_errors =
4397 perform_deskew_training(priv, rank_mask, if_num,
4398 spd_rawcard_aorb);
4399
4400 // All the Deskew lock and saturation retries (may) have
4401 // been done, but we ended up with nibble errors; so,
4402 // as a last ditch effort, try the Internal vref
4403 // Training again...
4404 if (deskew_training_errors) {
4405 if (internal_retries <
4406 DEFAULT_INTERNAL_VREF_TRAINING_LIMIT) {
4407 internal_retries++;
4408 debug("N%d.LMC%d: Deskew training results still unsettled - retrying internal vref training (%d)\n",
4409 node, if_num, internal_retries);
4410 goto perform_internal_vref_training;
4411 } else {
4412 if (restart_if_dsk_incomplete) {
4413 debug("N%d.LMC%d: INFO: Deskew training incomplete - %d retries exhausted, Restarting LMC init...\n",
4414 node, if_num, internal_retries);
4415 return -EAGAIN;
4416 }
4417 debug("N%d.LMC%d: Deskew training incomplete - %d retries exhausted, but continuing...\n",
4418 node, if_num, internal_retries);
4419 }
4420 } /* if (deskew_training_errors) */
4421
4422 // FIXME: treat this as the final DSK print from now on,
4423 // and print if VBL_NORM or above also, save the results
4424 // of the original training in case we want them later
4425 validate_deskew_training(priv, rank_mask, if_num,
4426 &deskew_training_results, 1);
4427 } else { /* if (! disable_deskew_training) */
4428 debug("N%d.LMC%d: Deskew Training disabled, printing settings before HWL.\n",
4429 node, if_num);
4430 validate_deskew_training(priv, rank_mask, if_num,
4431 &deskew_training_results, 1);
4432 } /* if (! disable_deskew_training) */
4433
4434 if (enable_by_rank_init) {
4435 read_dac_dbi_settings(priv, if_num, /*dac */ 1,
4436 &rank_dac[by_rank].bytes[0]);
4437 get_deskew_settings(priv, if_num, &rank_dsk[by_rank]);
4438 debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4439 }
4440
4441end_by_rank_init:
4442
4443 if (enable_by_rank_init) {
4444 //debug("\n>>>>> BY_RANK: ending rank %d\n\n", by_rank);
4445
4446 by_rank--;
4447 if (by_rank >= 0)
4448 goto start_by_rank_init;
4449
4450 rank_mask = saved_rank_mask;
4451 ddr_init_seq(priv, rank_mask, if_num);
4452
4453 process_by_rank_dac(priv, if_num, rank_mask, rank_dac);
4454 process_by_rank_dsk(priv, if_num, rank_mask, rank_dsk);
4455
4456 // FIXME: set this to prevent later checking!!!
4457 disable_deskew_training = 1;
4458
4459 debug("\n>>>>> BY_RANK: FINISHED!!\n\n");
4460 }
4461
4462 return 0;
4463}
4464
4465static void lmc_config_2(struct ddr_priv *priv)
4466{
4467 union cvmx_lmcx_config lmc_config;
4468 int save_ref_zqcs_int;
4469 u64 temp_delay_usecs;
4470
4471 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4472
4473 /*
4474 * Temporarily select the minimum ZQCS interval and wait
4475 * long enough for a few ZQCS calibrations to occur. This
4476 * should ensure that the calibration circuitry is
4477 * stabilized before read/write leveling occurs.
4478 */
4479 if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4480 save_ref_zqcs_int = lmc_config.cn78xx.ref_zqcs_int;
4481 /* set smallest interval */
4482 lmc_config.cn78xx.ref_zqcs_int = 1 | (32 << 7);
4483 } else {
4484 save_ref_zqcs_int = lmc_config.cn63xx.ref_zqcs_int;
4485 /* set smallest interval */
4486 lmc_config.cn63xx.ref_zqcs_int = 1 | (32 << 7);
4487 }
4488 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4489 lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4490
4491 /*
4492 * Compute an appropriate delay based on the current ZQCS
4493 * interval. The delay should be long enough for the
4494 * current ZQCS delay counter to expire plus ten of the
4495 * minimum intarvals to ensure that some calibrations
4496 * occur.
4497 */
4498 temp_delay_usecs = (((u64)save_ref_zqcs_int >> 7) * tclk_psecs *
4499 100 * 512 * 128) / (10000 * 10000) + 10 *
4500 ((u64)32 * tclk_psecs * 100 * 512 * 128) / (10000 * 10000);
4501
4502 debug("Waiting %lld usecs for ZQCS calibrations to start\n",
4503 temp_delay_usecs);
4504 udelay(temp_delay_usecs);
4505
4506 if (octeon_is_cpuid(OCTEON_CN7XXX)) {
4507 /* Restore computed interval */
4508 lmc_config.cn78xx.ref_zqcs_int = save_ref_zqcs_int;
4509 } else {
4510 /* Restore computed interval */
4511 lmc_config.cn63xx.ref_zqcs_int = save_ref_zqcs_int;
4512 }
4513
4514 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), lmc_config.u64);
4515 lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
4516}
4517
4518static union cvmx_lmcx_wlevel_ctl wl_ctl __section(".data");
4519static union cvmx_lmcx_wlevel_rankx wl_rank __section(".data");
4520static union cvmx_lmcx_modereg_params1 mp1 __section(".data");
4521
4522static int wl_mask[9] __section(".data");
4523static int byte_idx __section(".data");
4524static int ecc_ena __section(".data");
4525static int wl_roundup __section(".data");
4526static int save_mode32b __section(".data");
4527static int disable_hwl_validity __section(".data");
4528static int default_wl_rtt_nom __section(".data");
4529static int wl_pbm_pump __section(".data");
4530
4531static void lmc_write_leveling_loop(struct ddr_priv *priv, int rankx)
4532{
4533 int wloop = 0;
4534 // retries per sample for HW-related issues with bitmasks or values
4535 int wloop_retries = 0;
4536 int wloop_retries_total = 0;
4537 int wloop_retries_exhausted = 0;
4538#define WLOOP_RETRIES_DEFAULT 5
4539 int wl_val_err;
4540 int wl_mask_err_rank = 0;
4541 int wl_val_err_rank = 0;
4542 // array to collect counts of byte-lane values
4543 // assume low-order 3 bits and even, so really only 2-bit values
4544 struct wlevel_bitcnt wl_bytes[9], wl_bytes_extra[9];
4545 int extra_bumps, extra_mask;
4546 int rank_nom = 0;
4547
4548 if (!(rank_mask & (1 << rankx)))
4549 return;
4550
4551 if (match_wl_rtt_nom) {
4552 if (rankx == 0)
4553 rank_nom = mp1.s.rtt_nom_00;
4554 if (rankx == 1)
4555 rank_nom = mp1.s.rtt_nom_01;
4556 if (rankx == 2)
4557 rank_nom = mp1.s.rtt_nom_10;
4558 if (rankx == 3)
4559 rank_nom = mp1.s.rtt_nom_11;
4560
4561 debug("N%d.LMC%d.R%d: Setting WLEVEL_CTL[rtt_nom] to %d (%d)\n",
4562 node, if_num, rankx, rank_nom,
4563 imp_val->rtt_nom_ohms[rank_nom]);
4564 }
4565
4566 memset(wl_bytes, 0, sizeof(wl_bytes));
4567 memset(wl_bytes_extra, 0, sizeof(wl_bytes_extra));
4568
4569 // restructure the looping so we can keep trying until we get the
4570 // samples we want
4571 while (wloop < wl_loops) {
4572 wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
4573
4574 wl_ctl.cn78xx.rtt_nom =
4575 (default_wl_rtt_nom > 0) ? (default_wl_rtt_nom - 1) : 7;
4576
4577 if (match_wl_rtt_nom) {
4578 wl_ctl.cn78xx.rtt_nom =
4579 (rank_nom > 0) ? (rank_nom - 1) : 7;
4580 }
4581
4582 /* Clear write-level delays */
4583 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), 0);
4584
4585 wl_mask_err = 0; /* Reset error counters */
4586 wl_val_err = 0;
4587
4588 for (byte_idx = 0; byte_idx < 9; ++byte_idx)
4589 wl_mask[byte_idx] = 0; /* Reset bitmasks */
4590
4591 // do all the byte-lanes at the same time
4592 wl_ctl.cn78xx.lanemask = 0x1ff;
4593
4594 lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
4595
4596 /*
4597 * Read and write values back in order to update the
4598 * status field. This insures that we read the updated
4599 * values after write-leveling has completed.
4600 */
4601 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4602 lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num)));
4603
4604 /* write-leveling */
4605 oct3_ddr3_seq(priv, 1 << rankx, if_num, 6);
4606
4607 do {
4608 wl_rank.u64 = lmc_rd(priv,
4609 CVMX_LMCX_WLEVEL_RANKX(rankx,
4610 if_num));
4611 } while (wl_rank.cn78xx.status != 3);
4612
4613 wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
4614 if_num));
4615
4616 for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4617 wl_mask[byte_idx] = lmc_ddr3_wl_dbg_read(priv,
4618 if_num,
4619 byte_idx);
4620 if (wl_mask[byte_idx] == 0)
4621 ++wl_mask_err;
4622 }
4623
4624 // check validity only if no bitmask errors
4625 if (wl_mask_err == 0) {
4626 if ((spd_dimm_type == 1 || spd_dimm_type == 2) &&
4627 dram_width != 16 && if_64b &&
4628 !disable_hwl_validity) {
4629 // bypass if [mini|SO]-[RU]DIMM or x16 or
4630 // 32-bit
4631 wl_val_err =
4632 validate_hw_wl_settings(if_num,
4633 &wl_rank,
4634 spd_rdimm, ecc_ena);
4635 wl_val_err_rank += (wl_val_err != 0);
4636 }
4637 } else {
4638 wl_mask_err_rank++;
4639 }
4640
4641 // before we print, if we had bitmask or validity errors,
4642 // do a retry...
4643 if (wl_mask_err != 0 || wl_val_err != 0) {
4644 if (wloop_retries < WLOOP_RETRIES_DEFAULT) {
4645 wloop_retries++;
4646 wloop_retries_total++;
4647 // this printout is per-retry: only when VBL
4648 // is high enough (DEV?)
4649 // FIXME: do we want to show the bad bitmaps
4650 // or delays here also?
4651 debug("N%d.LMC%d.R%d: H/W Write-Leveling had %s errors - retrying...\n",
4652 node, if_num, rankx,
4653 (wl_mask_err) ? "Bitmask" : "Validity");
4654 // this takes us back to the top without
4655 // counting a sample
4656 return;
4657 }
4658
4659 // retries exhausted, do not print at normal VBL
4660 debug("N%d.LMC%d.R%d: H/W Write-Leveling issues: %s errors\n",
4661 node, if_num, rankx,
4662 (wl_mask_err) ? "Bitmask" : "Validity");
4663 wloop_retries_exhausted++;
4664 }
4665 // no errors or exhausted retries, use this sample
4666 wloop_retries = 0; //reset for next sample
4667
4668 // when only 1 sample or forced, print the bitmasks then
4669 // current HW WL
4670 if (wl_loops == 1 || wl_print) {
4671 if (wl_print > 1)
4672 display_wl_bm(if_num, rankx, wl_mask);
4673 display_wl(if_num, wl_rank, rankx);
4674 }
4675
4676 if (wl_roundup) { /* Round up odd bitmask delays */
4677 for (byte_idx = 0; byte_idx < (8 + ecc_ena);
4678 ++byte_idx) {
4679 if (!(if_bytemask & (1 << byte_idx)))
4680 return;
4681 upd_wl_rank(&wl_rank, byte_idx,
4682 roundup_ddr3_wlevel_bitmask
4683 (wl_mask[byte_idx]));
4684 }
4685 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4686 wl_rank.u64);
4687 display_wl(if_num, wl_rank, rankx);
4688 }
4689
4690 // OK, we have a decent sample, no bitmask or validity errors
4691 extra_bumps = 0;
4692 extra_mask = 0;
4693 for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4694 int ix;
4695
4696 if (!(if_bytemask & (1 << byte_idx)))
4697 return;
4698
4699 // increment count of byte-lane value
4700 // only 4 values
4701 ix = (get_wl_rank(&wl_rank, byte_idx) >> 1) & 3;
4702 wl_bytes[byte_idx].bitcnt[ix]++;
4703 wl_bytes_extra[byte_idx].bitcnt[ix]++;
4704 // if perfect...
4705 if (__builtin_popcount(wl_mask[byte_idx]) == 4) {
4706 wl_bytes_extra[byte_idx].bitcnt[ix] +=
4707 wl_pbm_pump;
4708 extra_bumps++;
4709 extra_mask |= 1 << byte_idx;
4710 }
4711 }
4712
4713 if (extra_bumps) {
4714 if (wl_print > 1) {
4715 debug("N%d.LMC%d.R%d: HWL sample had %d bumps (0x%02x).\n",
4716 node, if_num, rankx, extra_bumps,
4717 extra_mask);
4718 }
4719 }
4720
4721 // if we get here, we have taken a decent sample
4722 wloop++;
4723
4724 } /* while (wloop < wl_loops) */
4725
4726 // if we did sample more than once, try to pick a majority vote
4727 if (wl_loops > 1) {
4728 // look for the majority in each byte-lane
4729 for (byte_idx = 0; byte_idx < (8 + ecc_ena); ++byte_idx) {
4730 int mx, mc, xc, cc;
4731 int ix, alts;
4732 int maj, xmaj, xmx, xmc, xxc, xcc;
4733
4734 if (!(if_bytemask & (1 << byte_idx)))
4735 return;
4736 maj = find_wl_majority(&wl_bytes[byte_idx], &mx,
4737 &mc, &xc, &cc);
4738 xmaj = find_wl_majority(&wl_bytes_extra[byte_idx],
4739 &xmx, &xmc, &xxc, &xcc);
4740 if (maj != xmaj) {
4741 if (wl_print) {
4742 debug("N%d.LMC%d.R%d: Byte %d: HWL maj %d(%d), USING xmaj %d(%d)\n",
4743 node, if_num, rankx,
4744 byte_idx, maj, xc, xmaj, xxc);
4745 }
4746 mx = xmx;
4747 mc = xmc;
4748 xc = xxc;
4749 cc = xcc;
4750 }
4751
4752 // see if there was an alternate
4753 // take out the majority choice
4754 alts = (mc & ~(1 << mx));
4755 if (alts != 0) {
4756 for (ix = 0; ix < 4; ix++) {
4757 // FIXME: could be done multiple times?
4758 // bad if so
4759 if (alts & (1 << ix)) {
4760 // set the mask
4761 hwl_alts[rankx].hwl_alt_mask |=
4762 (1 << byte_idx);
4763 // record the value
4764 hwl_alts[rankx].hwl_alt_delay[byte_idx] =
4765 ix << 1;
4766 if (wl_print > 1) {
4767 debug("N%d.LMC%d.R%d: SWL_TRY_HWL_ALT: Byte %d maj %d (%d) alt %d (%d).\n",
4768 node,
4769 if_num,
4770 rankx,
4771 byte_idx,
4772 mx << 1,
4773 xc,
4774 ix << 1,
4775 wl_bytes
4776 [byte_idx].bitcnt
4777 [ix]);
4778 }
4779 }
4780 }
4781 }
4782
4783 if (cc > 2) { // unlikely, but...
4784 // assume: counts for 3 indices are all 1
4785 // possiblities are: 0/2/4, 2/4/6, 0/4/6, 0/2/6
4786 // and the desired?: 2 , 4 , 6, 0
4787 // we choose the middle, assuming one of the
4788 // outliers is bad
4789 // NOTE: this is an ugly hack at the moment;
4790 // there must be a better way
4791 switch (mc) {
4792 case 0x7:
4793 mx = 1;
4794 break; // was 0/2/4, choose 2
4795 case 0xb:
4796 mx = 0;
4797 break; // was 0/2/6, choose 0
4798 case 0xd:
4799 mx = 3;
4800 break; // was 0/4/6, choose 6
4801 case 0xe:
4802 mx = 2;
4803 break; // was 2/4/6, choose 4
4804 default:
4805 case 0xf:
4806 mx = 1;
4807 break; // was 0/2/4/6, choose 2?
4808 }
4809 printf("N%d.LMC%d.R%d: HW WL MAJORITY: bad byte-lane %d (0x%x), using %d.\n",
4810 node, if_num, rankx, byte_idx, mc,
4811 mx << 1);
4812 }
4813 upd_wl_rank(&wl_rank, byte_idx, mx << 1);
4814 }
4815
4816 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
4817 wl_rank.u64);
4818 display_wl_with_final(if_num, wl_rank, rankx);
4819
4820 // FIXME: does this help make the output a little easier
4821 // to focus?
4822 if (wl_print > 0)
4823 debug("-----------\n");
4824
4825 } /* if (wl_loops > 1) */
4826
4827 // maybe print an error summary for the rank
4828 if (wl_mask_err_rank != 0 || wl_val_err_rank != 0) {
4829 debug("N%d.LMC%d.R%d: H/W Write-Leveling errors - %d bitmask, %d validity, %d retries, %d exhausted\n",
4830 node, if_num, rankx, wl_mask_err_rank,
4831 wl_val_err_rank, wloop_retries_total,
4832 wloop_retries_exhausted);
4833 }
4834}
4835
4836static void lmc_write_leveling(struct ddr_priv *priv)
4837{
4838 union cvmx_lmcx_config cfg;
4839 int rankx;
4840 char *s;
4841
4842 /*
4843 * 4.8.9 LMC Write Leveling
4844 *
4845 * LMC supports an automatic write leveling like that described in the
4846 * JEDEC DDR3 specifications separately per byte-lane.
4847 *
4848 * All of DDR PLL, LMC CK, LMC DRESET, and early LMC initializations
4849 * must be completed prior to starting this LMC write-leveling sequence.
4850 *
4851 * There are many possible procedures that will write-level all the
4852 * attached DDR3 DRAM parts. One possibility is for software to simply
4853 * write the desired values into LMC(0)_WLEVEL_RANK(0..3). This section
4854 * describes one possible sequence that uses LMC's autowrite-leveling
4855 * capabilities.
4856 *
4857 * 1. If the DQS/DQ delays on the board may be more than the ADD/CMD
4858 * delays, then ensure that LMC(0)_CONFIG[EARLY_DQX] is set at this
4859 * point.
4860 *
4861 * Do the remaining steps 2-7 separately for each rank i with attached
4862 * DRAM.
4863 *
4864 * 2. Write LMC(0)_WLEVEL_RANKi = 0.
4865 *
4866 * 3. For x8 parts:
4867 *
4868 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4869 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all byte lanes with attached
4870 * DRAM.
4871 *
4872 * For x16 parts:
4873 *
4874 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4875 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all even byte lanes with
4876 * attached DRAM.
4877 *
4878 * 4. Without changing any other fields in LMC(0)_CONFIG,
4879 *
4880 * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select write-leveling
4881 *
4882 * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
4883 *
4884 * o write LMC(0)_SEQ_CTL[INIT_START] = 1
4885 *
4886 * LMC will initiate write-leveling at this point. Assuming
4887 * LMC(0)_WLEVEL_CTL [SSET] = 0, LMC first enables write-leveling on
4888 * the selected DRAM rank via a DDR3 MR1 write, then sequences
4889 * through
4890 * and accumulates write-leveling results for eight different delay
4891 * settings twice, starting at a delay of zero in this case since
4892 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] = 0, increasing by 1/8 CK each
4893 * setting, covering a total distance of one CK, then disables the
4894 * write-leveling via another DDR3 MR1 write.
4895 *
4896 * After the sequence through 16 delay settings is complete:
4897 *
4898 * o LMC sets LMC(0)_WLEVEL_RANKi[STATUS] = 3
4899 *
4900 * o LMC sets LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] (for all ranks selected
4901 * by LMC(0)_WLEVEL_CTL[LANEMASK]) to indicate the first write
4902 * leveling result of 1 that followed result of 0 during the
4903 * sequence, except that the LMC always writes
4904 * LMC(0)_WLEVEL_RANKi[BYTE*<0>]=0.
4905 *
4906 * o Software can read the eight write-leveling results from the
4907 * first pass through the delay settings by reading
4908 * LMC(0)_WLEVEL_DBG[BITMASK] (after writing
4909 * LMC(0)_WLEVEL_DBG[BYTE]). (LMC does not retain the writeleveling
4910 * results from the second pass through the eight delay
4911 * settings. They should often be identical to the
4912 * LMC(0)_WLEVEL_DBG[BITMASK] results, though.)
4913 *
4914 * 5. Wait until LMC(0)_WLEVEL_RANKi[STATUS] != 2.
4915 *
4916 * LMC will have updated LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] for all byte
4917 * lanes selected by LMC(0)_WLEVEL_CTL[LANEMASK] at this point.
4918 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] will still be the value that
4919 * software wrote in substep 2 above, which is 0.
4920 *
4921 * 6. For x16 parts:
4922 *
4923 * Without changing any other fields in LMC(0)_WLEVEL_CTL, write
4924 * LMC(0)_WLEVEL_CTL[LANEMASK] to select all odd byte lanes with
4925 * attached DRAM.
4926 *
4927 * Repeat substeps 4 and 5 with this new LMC(0)_WLEVEL_CTL[LANEMASK]
4928 * setting. Skip to substep 7 if this has already been done.
4929 *
4930 * For x8 parts:
4931 *
4932 * Skip this substep. Go to substep 7.
4933 *
4934 * 7. Calculate LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings for all byte
4935 * lanes on all ranks with attached DRAM.
4936 *
4937 * At this point, all byte lanes on rank i with attached DRAM should
4938 * have been write-leveled, and LMC(0)_WLEVEL_RANKi[BYTE*<2:0>] has
4939 * the result for each byte lane.
4940 *
4941 * But note that the DDR3 write-leveling sequence will only determine
4942 * the delay modulo the CK cycle time, and cannot determine how many
4943 * additional CK cycles of delay are present. Software must calculate
4944 * the number of CK cycles, or equivalently, the
4945 * LMC(0)_WLEVEL_RANKi[BYTE*<4:3>] settings.
4946 *
4947 * This BYTE*<4:3> calculation is system/board specific.
4948 *
4949 * Many techniques can be used to calculate write-leveling BYTE*<4:3>
4950 * values, including:
4951 *
4952 * o Known values for some byte lanes.
4953 *
4954 * o Relative values for some byte lanes relative to others.
4955 *
4956 * For example, suppose lane X is likely to require a larger
4957 * write-leveling delay than lane Y. A BYTEX<2:0> value that is much
4958 * smaller than the BYTEY<2:0> value may then indicate that the
4959 * required lane X delay wrapped into the next CK, so BYTEX<4:3>
4960 * should be set to BYTEY<4:3>+1.
4961 *
4962 * When ECC DRAM is not present (i.e. when DRAM is not attached to
4963 * the DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the
4964 * DDR_DQS_<4>_* and DDR_DQ<35:32> chip signals), write
4965 * LMC(0)_WLEVEL_RANK*[BYTE8] = LMC(0)_WLEVEL_RANK*[BYTE0],
4966 * using the final calculated BYTE0 value.
4967 * Write LMC(0)_WLEVEL_RANK*[BYTE4] = LMC(0)_WLEVEL_RANK*[BYTE0],
4968 * using the final calculated BYTE0 value.
4969 *
4970 * 8. Initialize LMC(0)_WLEVEL_RANK* values for all unused ranks.
4971 *
4972 * Let rank i be a rank with attached DRAM.
4973 *
4974 * For all ranks j that do not have attached DRAM, set
4975 * LMC(0)_WLEVEL_RANKj = LMC(0)_WLEVEL_RANKi.
4976 */
4977
4978 rankx = 0;
4979 wl_roundup = 0;
4980 disable_hwl_validity = 0;
4981
4982 // wl_pbm_pump: weight for write-leveling PBMs...
4983 // 0 causes original behavior
4984 // 1 allows a minority of 2 pbms to outscore a majority of 3 non-pbms
4985 // 4 would allow a minority of 1 pbm to outscore a majority of 4
4986 // non-pbms
4987 wl_pbm_pump = 4; // FIXME: is 4 too much?
4988
4989 if (wl_loops) {
4990 debug("N%d.LMC%d: Performing Hardware Write-Leveling\n", node,
4991 if_num);
4992 } else {
4993 /* Force software write-leveling to run */
4994 wl_mask_err = 1;
4995 debug("N%d.LMC%d: Forcing software Write-Leveling\n", node,
4996 if_num);
4997 }
4998
4999 default_wl_rtt_nom = (ddr_type == DDR3_DRAM) ?
5000 rttnom_20ohm : ddr4_rttnom_40ohm;
5001
5002 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5003 ecc_ena = cfg.s.ecc_ena;
5004 save_mode32b = cfg.cn78xx.mode32b;
5005 cfg.cn78xx.mode32b = (!if_64b);
5006 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5007 debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5008
5009 s = lookup_env(priv, "ddr_wlevel_roundup");
5010 if (s)
5011 wl_roundup = simple_strtoul(s, NULL, 0);
5012
5013 s = lookup_env(priv, "ddr_wlevel_printall");
5014 if (s)
5015 wl_print = strtoul(s, NULL, 0);
5016
5017 s = lookup_env(priv, "ddr_wlevel_pbm_bump");
5018 if (s)
5019 wl_pbm_pump = strtoul(s, NULL, 0);
5020
5021 // default to disable when RL sequential delay check is disabled
5022 disable_hwl_validity = disable_sequential_delay_check;
5023 s = lookup_env(priv, "ddr_disable_hwl_validity");
5024 if (s)
5025 disable_hwl_validity = !!strtoul(s, NULL, 0);
5026
5027 s = lookup_env(priv, "ddr_wl_rtt_nom");
5028 if (s)
5029 default_wl_rtt_nom = simple_strtoul(s, NULL, 0);
5030
5031 s = lookup_env(priv, "ddr_match_wl_rtt_nom");
5032 if (s)
5033 match_wl_rtt_nom = !!simple_strtoul(s, NULL, 0);
5034
5035 if (match_wl_rtt_nom)
5036 mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
5037
5038 // For DDR3, we do not touch WLEVEL_CTL fields OR_DIS or BITMASK
5039 // For DDR4, we touch WLEVEL_CTL fields OR_DIS or BITMASK here
5040 if (ddr_type == DDR4_DRAM) {
5041 int default_or_dis = 1;
5042 int default_bitmask = 0xff;
5043
5044 // when x4, use only the lower nibble
5045 if (dram_width == 4) {
5046 default_bitmask = 0x0f;
5047 if (wl_print) {
5048 debug("N%d.LMC%d: WLEVEL_CTL: default bitmask is 0x%02x for DDR4 x4\n",
5049 node, if_num, default_bitmask);
5050 }
5051 }
5052
5053 wl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_CTL(if_num));
5054 wl_ctl.s.or_dis = default_or_dis;
5055 wl_ctl.s.bitmask = default_bitmask;
5056
5057 // allow overrides
5058 s = lookup_env(priv, "ddr_wlevel_ctl_or_dis");
5059 if (s)
5060 wl_ctl.s.or_dis = !!strtoul(s, NULL, 0);
5061
5062 s = lookup_env(priv, "ddr_wlevel_ctl_bitmask");
5063 if (s)
5064 wl_ctl.s.bitmask = simple_strtoul(s, NULL, 0);
5065
5066 // print only if not defaults
5067 if (wl_ctl.s.or_dis != default_or_dis ||
5068 wl_ctl.s.bitmask != default_bitmask) {
5069 debug("N%d.LMC%d: WLEVEL_CTL: or_dis=%d, bitmask=0x%02x\n",
5070 node, if_num, wl_ctl.s.or_dis, wl_ctl.s.bitmask);
5071 }
5072
5073 // always write
5074 lmc_wr(priv, CVMX_LMCX_WLEVEL_CTL(if_num), wl_ctl.u64);
5075 }
5076
5077 // Start the hardware write-leveling loop per rank
5078 for (rankx = 0; rankx < dimm_count * 4; rankx++)
5079 lmc_write_leveling_loop(priv, rankx);
5080
5081 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5082 cfg.cn78xx.mode32b = save_mode32b;
5083 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5084 debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
5085
5086 // At the end of HW Write Leveling, check on some DESKEW things...
5087 if (!disable_deskew_training) {
5088 struct deskew_counts dsk_counts;
5089 int retry_count = 0;
5090
5091 debug("N%d.LMC%d: Check Deskew Settings before Read-Leveling.\n",
5092 node, if_num);
5093
5094 do {
5095 validate_deskew_training(priv, rank_mask, if_num,
5096 &dsk_counts, 1);
5097
5098 // only RAWCARD A or B will not benefit from
5099 // retraining if there's only saturation
5100 // or any rawcard if there is a nibble error
5101 if ((!spd_rawcard_aorb && dsk_counts.saturated > 0) ||
5102 (dsk_counts.nibrng_errs != 0 ||
5103 dsk_counts.nibunl_errs != 0)) {
5104 retry_count++;
5105 debug("N%d.LMC%d: Deskew Status indicates saturation or nibble errors - retry %d Training.\n",
5106 node, if_num, retry_count);
5107 perform_deskew_training(priv, rank_mask, if_num,
5108 spd_rawcard_aorb);
5109 } else {
5110 break;
5111 }
5112 } while (retry_count < 5);
5113 }
5114}
5115
5116static void lmc_workaround(struct ddr_priv *priv)
5117{
5118 /* Workaround Trcd overflow by using Additive latency. */
5119 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
5120 union cvmx_lmcx_modereg_params0 mp0;
5121 union cvmx_lmcx_timing_params1 tp1;
5122 union cvmx_lmcx_control ctrl;
5123 int rankx;
5124
5125 tp1.u64 = lmc_rd(priv, CVMX_LMCX_TIMING_PARAMS1(if_num));
5126 mp0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
5127 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
5128
5129 if (tp1.cn78xx.trcd == 0) {
5130 debug("Workaround Trcd overflow by using Additive latency.\n");
5131 /* Hard code this to 12 and enable additive latency */
5132 tp1.cn78xx.trcd = 12;
5133 mp0.s.al = 2; /* CL-2 */
5134 ctrl.s.pocas = 1;
5135
5136 debug("MODEREG_PARAMS0 : 0x%016llx\n",
5137 mp0.u64);
5138 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
5139 mp0.u64);
5140 debug("TIMING_PARAMS1 : 0x%016llx\n",
5141 tp1.u64);
5142 lmc_wr(priv, CVMX_LMCX_TIMING_PARAMS1(if_num), tp1.u64);
5143
5144 debug("LMC_CONTROL : 0x%016llx\n",
5145 ctrl.u64);
5146 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
5147
5148 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5149 if (!(rank_mask & (1 << rankx)))
5150 continue;
5151
5152 /* MR1 */
5153 ddr4_mrw(priv, if_num, rankx, -1, 1, 0);
5154 }
5155 }
5156 }
5157
5158 // this is here just for output, to allow check of the Deskew
5159 // settings one last time...
5160 if (!disable_deskew_training) {
5161 struct deskew_counts dsk_counts;
5162
5163 debug("N%d.LMC%d: Check Deskew Settings before software Write-Leveling.\n",
5164 node, if_num);
5165 validate_deskew_training(priv, rank_mask, if_num, &dsk_counts,
5166 3);
5167 }
5168
5169 /*
5170 * Workaround Errata 26304 (T88@2.0, O75@1.x, O78@2.x)
5171 *
5172 * When the CSRs LMCX_DLL_CTL3[WR_DESKEW_ENA] = 1 AND
5173 * LMCX_PHY_CTL2[DQS[0..8]_DSK_ADJ] > 4, set
5174 * LMCX_EXT_CONFIG[DRIVE_ENA_BPRCH] = 1.
5175 */
5176 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
5177 octeon_is_cpuid(OCTEON_CNF75XX_PASS1_X)) {
5178 union cvmx_lmcx_dll_ctl3 dll_ctl3;
5179 union cvmx_lmcx_phy_ctl2 phy_ctl2;
5180 union cvmx_lmcx_ext_config ext_cfg;
5181 int increased_dsk_adj = 0;
5182 int byte;
5183
5184 phy_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL2(if_num));
5185 ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
5186 dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
5187
5188 for (byte = 0; byte < 8; ++byte) {
5189 if (!(if_bytemask & (1 << byte)))
5190 continue;
5191 increased_dsk_adj |=
5192 (((phy_ctl2.u64 >> (byte * 3)) & 0x7) > 4);
5193 }
5194
5195 if (dll_ctl3.s.wr_deskew_ena == 1 && increased_dsk_adj) {
5196 ext_cfg.s.drive_ena_bprch = 1;
5197 lmc_wr(priv, CVMX_LMCX_EXT_CONFIG(if_num), ext_cfg.u64);
5198 debug("LMC%d: Forcing DRIVE_ENA_BPRCH for Workaround Errata 26304.\n",
5199 if_num);
5200 }
5201 }
5202}
5203
5204// Software Write-Leveling block
5205
5206#define VREF_RANGE1_LIMIT 0x33 // range1 is valid for 0x00 - 0x32
5207#define VREF_RANGE2_LIMIT 0x18 // range2 is valid for 0x00 - 0x17
5208// full window is valid for 0x00 to 0x4A
5209// let 0x00 - 0x17 be range2, 0x18 - 0x4a be range 1
5210#define VREF_LIMIT (VREF_RANGE1_LIMIT + VREF_RANGE2_LIMIT)
5211#define VREF_FINAL (VREF_LIMIT - 1)
5212
5213enum sw_wl_status {
5214 WL_ESTIMATED = 0, /* HW/SW wleveling failed. Reslt estimated */
5215 WL_HARDWARE = 1, /* H/W wleveling succeeded */
5216 WL_SOFTWARE = 2, /* S/W wleveling passed 2 contiguous setting */
5217 WL_SOFTWARE1 = 3, /* S/W wleveling passed 1 marginal setting */
5218};
5219
5220static u64 rank_addr __section(".data");
5221static int vref_val __section(".data");
5222static int final_vref_val __section(".data");
5223static int final_vref_range __section(".data");
5224static int start_vref_val __section(".data");
5225static int computed_final_vref_val __section(".data");
5226static char best_vref_val_count __section(".data");
5227static char vref_val_count __section(".data");
5228static char best_vref_val_start __section(".data");
5229static char vref_val_start __section(".data");
5230static int bytes_failed __section(".data");
5231static enum sw_wl_status byte_test_status[9] __section(".data");
5232static enum sw_wl_status sw_wl_rank_status __section(".data");
5233static int sw_wl_failed __section(".data");
5234static int sw_wl_hw __section(".data");
5235static int measured_vref_flag __section(".data");
5236
5237static void ddr4_vref_loop(struct ddr_priv *priv, int rankx)
5238{
5239 char *s;
5240
5241 if (vref_val < VREF_FINAL) {
5242 int vrange, vvalue;
5243
5244 if (vref_val < VREF_RANGE2_LIMIT) {
5245 vrange = 1;
5246 vvalue = vref_val;
5247 } else {
5248 vrange = 0;
5249 vvalue = vref_val - VREF_RANGE2_LIMIT;
5250 }
5251
5252 set_vref(priv, if_num, rankx, vrange, vvalue);
5253 } else { /* if (vref_val < VREF_FINAL) */
5254 /* Print the final vref value first. */
5255
5256 /* Always print the computed first if its valid */
5257 if (computed_final_vref_val >= 0) {
5258 debug("N%d.LMC%d.R%d: vref Computed Summary : %2d (0x%02x)\n",
5259 node, if_num, rankx,
5260 computed_final_vref_val, computed_final_vref_val);
5261 }
5262
5263 if (!measured_vref_flag) { // setup to use the computed
5264 best_vref_val_count = 1;
5265 final_vref_val = computed_final_vref_val;
5266 } else { // setup to use the measured
5267 if (best_vref_val_count > 0) {
5268 best_vref_val_count =
5269 max(best_vref_val_count, (char)2);
5270 final_vref_val = best_vref_val_start +
5271 divide_nint(best_vref_val_count - 1, 2);
5272
5273 if (final_vref_val < VREF_RANGE2_LIMIT) {
5274 final_vref_range = 1;
5275 } else {
5276 final_vref_range = 0;
5277 final_vref_val -= VREF_RANGE2_LIMIT;
5278 }
5279
5280 int vvlo = best_vref_val_start;
5281 int vrlo;
5282 int vvhi = best_vref_val_start +
5283 best_vref_val_count - 1;
5284 int vrhi;
5285
5286 if (vvlo < VREF_RANGE2_LIMIT) {
5287 vrlo = 2;
5288 } else {
5289 vrlo = 1;
5290 vvlo -= VREF_RANGE2_LIMIT;
5291 }
5292
5293 if (vvhi < VREF_RANGE2_LIMIT) {
5294 vrhi = 2;
5295 } else {
5296 vrhi = 1;
5297 vvhi -= VREF_RANGE2_LIMIT;
5298 }
5299 debug("N%d.LMC%d.R%d: vref Training Summary : 0x%02x/%1d <----- 0x%02x/%1d -----> 0x%02x/%1d, range: %2d\n",
5300 node, if_num, rankx, vvlo, vrlo,
5301 final_vref_val,
5302 final_vref_range + 1, vvhi, vrhi,
5303 best_vref_val_count - 1);
5304
5305 } else {
5306 /*
5307 * If nothing passed use the default vref
5308 * value for this rank
5309 */
5310 union cvmx_lmcx_modereg_params2 mp2;
5311
5312 mp2.u64 =
5313 lmc_rd(priv,
5314 CVMX_LMCX_MODEREG_PARAMS2(if_num));
5315 final_vref_val = (mp2.u64 >>
5316 (rankx * 10 + 3)) & 0x3f;
5317 final_vref_range = (mp2.u64 >>
5318 (rankx * 10 + 9)) & 0x01;
5319
5320 debug("N%d.LMC%d.R%d: vref Using Default : %2d <----- %2d (0x%02x) -----> %2d, range%1d\n",
5321 node, if_num, rankx, final_vref_val,
5322 final_vref_val, final_vref_val,
5323 final_vref_val, final_vref_range + 1);
5324 }
5325 }
5326
5327 // allow override
5328 s = lookup_env(priv, "ddr%d_vref_val_%1d%1d",
5329 if_num, !!(rankx & 2), !!(rankx & 1));
5330 if (s)
5331 final_vref_val = strtoul(s, NULL, 0);
5332
5333 set_vref(priv, if_num, rankx, final_vref_range, final_vref_val);
5334 }
5335}
5336
5337#define WL_MIN_NO_ERRORS_COUNT 3 // FIXME? three passes without errors
5338
5339static int errors __section(".data");
5340static int byte_delay[9] __section(".data");
5341static u64 bytemask __section(".data");
5342static int bytes_todo __section(".data");
5343static int no_errors_count __section(".data");
5344static u64 bad_bits[2] __section(".data");
5345static u64 sum_dram_dclk __section(".data");
5346static u64 sum_dram_ops __section(".data");
5347static u64 start_dram_dclk __section(".data");
5348static u64 stop_dram_dclk __section(".data");
5349static u64 start_dram_ops __section(".data");
5350static u64 stop_dram_ops __section(".data");
5351
5352static void lmc_sw_write_leveling_loop(struct ddr_priv *priv, int rankx)
5353{
5354 int delay;
5355 int b;
5356
5357 // write the current set of WL delays
5358 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num), wl_rank.u64);
5359 wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
5360
5361 // do the test
5362 if (sw_wl_hw) {
5363 errors = run_best_hw_patterns(priv, if_num, rank_addr,
5364 DBTRAIN_TEST, bad_bits);
5365 errors &= bytes_todo; // keep only the ones we are still doing
5366 } else {
5367 start_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5368 start_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5369 errors = test_dram_byte64(priv, if_num, rank_addr, bytemask,
5370 bad_bits);
5371
5372 stop_dram_dclk = lmc_rd(priv, CVMX_LMCX_DCLK_CNT(if_num));
5373 stop_dram_ops = lmc_rd(priv, CVMX_LMCX_OPS_CNT(if_num));
5374 sum_dram_dclk += stop_dram_dclk - start_dram_dclk;
5375 sum_dram_ops += stop_dram_ops - start_dram_ops;
5376 }
5377
5378 debug("WL pass1: test_dram_byte returned 0x%x\n", errors);
5379
5380 // remember, errors will not be returned for byte-lanes that have
5381 // maxxed out...
5382 if (errors == 0) {
5383 no_errors_count++; // bump
5384 // bypass check/update completely
5385 if (no_errors_count > 1)
5386 return; // to end of do-while
5387 } else {
5388 no_errors_count = 0; // reset
5389 }
5390
5391 // check errors by byte
5392 for (b = 0; b < 9; ++b) {
5393 if (!(bytes_todo & (1 << b)))
5394 continue;
5395
5396 delay = byte_delay[b];
5397 // yes, an error in this byte lane
5398 if (errors & (1 << b)) {
5399 debug(" byte %d delay %2d Errors\n", b, delay);
5400 // since this byte had an error, we move to the next
5401 // delay value, unless done with it
5402 delay += 8; // incr by 8 to do delay high-order bits
5403 if (delay < 32) {
5404 upd_wl_rank(&wl_rank, b, delay);
5405 debug(" byte %d delay %2d New\n",
5406 b, delay);
5407 byte_delay[b] = delay;
5408 } else {
5409 // reached max delay, maybe really done with
5410 // this byte
5411 // consider an alt only for computed VREF and
5412 if (!measured_vref_flag &&
5413 (hwl_alts[rankx].hwl_alt_mask & (1 << b))) {
5414 // if an alt exists...
5415 // just orig low-3 bits
5416 int bad_delay = delay & 0x6;
5417
5418 // yes, use it
5419 delay = hwl_alts[rankx].hwl_alt_delay[b];
5420 // clear that flag
5421 hwl_alts[rankx].hwl_alt_mask &=
5422 ~(1 << b);
5423 upd_wl_rank(&wl_rank, b, delay);
5424 byte_delay[b] = delay;
5425 debug(" byte %d delay %2d ALTERNATE\n",
5426 b, delay);
5427 debug("N%d.LMC%d.R%d: SWL: Byte %d: %d FAIL, trying ALTERNATE %d\n",
5428 node, if_num,
5429 rankx, b, bad_delay, delay);
5430
5431 } else {
5432 unsigned int bits_bad;
5433
5434 if (b < 8) {
5435 // test no longer, remove from
5436 // byte mask
5437 bytemask &=
5438 ~(0xffULL << (8 * b));
5439 bits_bad = (unsigned int)
5440 ((bad_bits[0] >>
5441 (8 * b)) & 0xffUL);
5442 } else {
5443 bits_bad = (unsigned int)
5444 (bad_bits[1] & 0xffUL);
5445 }
5446
5447 // remove from bytes to do
5448 bytes_todo &= ~(1 << b);
5449 // make sure this is set for this case
5450 byte_test_status[b] = WL_ESTIMATED;
5451 debug(" byte %d delay %2d Exhausted\n",
5452 b, delay);
5453 if (!measured_vref_flag) {
5454 // this is too noisy when doing
5455 // measured VREF
5456 debug("N%d.LMC%d.R%d: SWL: Byte %d (0x%02x): delay %d EXHAUSTED\n",
5457 node, if_num, rankx,
5458 b, bits_bad, delay);
5459 }
5460 }
5461 }
5462 } else {
5463 // no error, stay with current delay, but keep testing
5464 // it...
5465 debug(" byte %d delay %2d Passed\n", b, delay);
5466 byte_test_status[b] = WL_HARDWARE; // change status
5467 }
5468 } /* for (b = 0; b < 9; ++b) */
5469}
5470
5471static void sw_write_lvl_use_ecc(struct ddr_priv *priv, int rankx)
5472{
5473 int save_byte8 = wl_rank.s.byte8;
5474
5475 byte_test_status[8] = WL_HARDWARE; /* H/W delay value */
5476
5477 if (save_byte8 != wl_rank.s.byte3 &&
5478 save_byte8 != wl_rank.s.byte4) {
5479 int test_byte8 = save_byte8;
5480 int test_byte8_error;
5481 int byte8_error = 0x1f;
5482 int adder;
5483 int avg_bytes = divide_nint(wl_rank.s.byte3 + wl_rank.s.byte4,
5484 2);
5485
5486 for (adder = 0; adder <= 32; adder += 8) {
5487 test_byte8_error = abs((adder + save_byte8) -
5488 avg_bytes);
5489 if (test_byte8_error < byte8_error) {
5490 byte8_error = test_byte8_error;
5491 test_byte8 = save_byte8 + adder;
5492 }
5493 }
5494
5495 // only do the check if we are not using measured VREF
5496 if (!measured_vref_flag) {
5497 /* Use only even settings, rounding down... */
5498 test_byte8 &= ~1;
5499
5500 // do validity check on the calculated ECC delay value
5501 // this depends on the DIMM type
5502 if (spd_rdimm) { // RDIMM
5503 // but not mini-RDIMM
5504 if (spd_dimm_type != 5) {
5505 // it can be > byte4, but should never
5506 // be > byte3
5507 if (test_byte8 > wl_rank.s.byte3) {
5508 /* say it is still estimated */
5509 byte_test_status[8] =
5510 WL_ESTIMATED;
5511 }
5512 }
5513 } else { // UDIMM
5514 if (test_byte8 < wl_rank.s.byte3 ||
5515 test_byte8 > wl_rank.s.byte4) {
5516 // should never be outside the
5517 // byte 3-4 range
5518 /* say it is still estimated */
5519 byte_test_status[8] = WL_ESTIMATED;
5520 }
5521 }
5522 /*
5523 * Report whenever the calculation appears bad.
5524 * This happens if some of the original values were off,
5525 * or unexpected geometry from DIMM type, or custom
5526 * circuitry (NIC225E, I am looking at you!).
5527 * We will trust the calculated value, and depend on
5528 * later testing to catch any instances when that
5529 * value is truly bad.
5530 */
5531 // ESTIMATED means there may be an issue
5532 if (byte_test_status[8] == WL_ESTIMATED) {
5533 debug("N%d.LMC%d.R%d: SWL: (%cDIMM): calculated ECC delay unexpected (%d/%d/%d)\n",
5534 node, if_num, rankx,
5535 (spd_rdimm ? 'R' : 'U'), wl_rank.s.byte4,
5536 test_byte8, wl_rank.s.byte3);
5537 byte_test_status[8] = WL_HARDWARE;
5538 }
5539 }
5540 /* Use only even settings */
5541 wl_rank.s.byte8 = test_byte8 & ~1;
5542 }
5543
5544 if (wl_rank.s.byte8 != save_byte8) {
5545 /* Change the status if s/w adjusted the delay */
5546 byte_test_status[8] = WL_SOFTWARE; /* Estimated delay */
5547 }
5548}
5549
5550static __maybe_unused void parallel_wl_block_delay(struct ddr_priv *priv,
5551 int rankx)
5552{
5553 int errors;
5554 int byte_delay[8];
5555 int byte_passed[8];
5556 u64 bytemask;
5557 u64 bitmask;
5558 int wl_offset;
5559 int bytes_todo;
5560 int sw_wl_offset = 1;
5561 int delay;
5562 int b;
5563
5564 for (b = 0; b < 8; ++b)
5565 byte_passed[b] = 0;
5566
5567 bytes_todo = if_bytemask;
5568
5569 for (wl_offset = sw_wl_offset; wl_offset >= 0; --wl_offset) {
5570 debug("Starting wl_offset for-loop: %d\n", wl_offset);
5571
5572 bytemask = 0;
5573
5574 for (b = 0; b < 8; ++b) {
5575 byte_delay[b] = 0;
5576 // this does not contain fully passed bytes
5577 if (!(bytes_todo & (1 << b)))
5578 continue;
5579
5580 // reset across passes if not fully passed
5581 byte_passed[b] = 0;
5582 upd_wl_rank(&wl_rank, b, 0); // all delays start at 0
5583 bitmask = ((!if_64b) && (b == 4)) ? 0x0f : 0xff;
5584 // set the bytes bits in the bytemask
5585 bytemask |= bitmask << (8 * b);
5586 } /* for (b = 0; b < 8; ++b) */
5587
5588 // start a pass if there is any byte lane to test
5589 while (bytemask != 0) {
5590 debug("Starting bytemask while-loop: 0x%llx\n",
5591 bytemask);
5592
5593 // write this set of WL delays
5594 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
5595 wl_rank.u64);
5596 wl_rank.u64 = lmc_rd(priv,
5597 CVMX_LMCX_WLEVEL_RANKX(rankx,
5598 if_num));
5599
5600 // do the test
5601 if (sw_wl_hw) {
5602 errors = run_best_hw_patterns(priv, if_num,
5603 rank_addr,
5604 DBTRAIN_TEST,
5605 NULL) & 0xff;
5606 } else {
5607 errors = test_dram_byte64(priv, if_num,
5608 rank_addr, bytemask,
5609 NULL);
5610 }
5611
5612 debug("test_dram_byte returned 0x%x\n", errors);
5613
5614 // check errors by byte
5615 for (b = 0; b < 8; ++b) {
5616 if (!(bytes_todo & (1 << b)))
5617 continue;
5618
5619 delay = byte_delay[b];
5620 if (errors & (1 << b)) { // yes, an error
5621 debug(" byte %d delay %2d Errors\n",
5622 b, delay);
5623 byte_passed[b] = 0;
5624 } else { // no error
5625 byte_passed[b] += 1;
5626 // Look for consecutive working settings
5627 if (byte_passed[b] == (1 + wl_offset)) {
5628 debug(" byte %d delay %2d FULLY Passed\n",
5629 b, delay);
5630 if (wl_offset == 1) {
5631 byte_test_status[b] =
5632 WL_SOFTWARE;
5633 } else if (wl_offset == 0) {
5634 byte_test_status[b] =
5635 WL_SOFTWARE1;
5636 }
5637
5638 // test no longer, remove
5639 // from byte mask this pass
5640 bytemask &= ~(0xffULL <<
5641 (8 * b));
5642 // remove completely from
5643 // concern
5644 bytes_todo &= ~(1 << b);
5645 // on to the next byte, bypass
5646 // delay updating!!
5647 continue;
5648 } else {
5649 debug(" byte %d delay %2d Passed\n",
5650 b, delay);
5651 }
5652 }
5653
5654 // error or no, here we move to the next delay
5655 // value for this byte, unless done all delays
5656 // only a byte that has "fully passed" will
5657 // bypass around this,
5658 delay += 2;
5659 if (delay < 32) {
5660 upd_wl_rank(&wl_rank, b, delay);
5661 debug(" byte %d delay %2d New\n",
5662 b, delay);
5663 byte_delay[b] = delay;
5664 } else {
5665 // reached max delay, done with this
5666 // byte
5667 debug(" byte %d delay %2d Exhausted\n",
5668 b, delay);
5669 // test no longer, remove from byte
5670 // mask this pass
5671 bytemask &= ~(0xffULL << (8 * b));
5672 }
5673 } /* for (b = 0; b < 8; ++b) */
5674 debug("End of for-loop: bytemask 0x%llx\n", bytemask);
5675 } /* while (bytemask != 0) */
5676 }
5677
5678 for (b = 0; b < 8; ++b) {
5679 // any bytes left in bytes_todo did not pass
5680 if (bytes_todo & (1 << b)) {
5681 union cvmx_lmcx_rlevel_rankx lmc_rlevel_rank;
5682
5683 /*
5684 * Last resort. Use Rlevel settings to estimate
5685 * Wlevel if software write-leveling fails
5686 */
5687 debug("Using RLEVEL as WLEVEL estimate for byte %d\n",
5688 b);
5689 lmc_rlevel_rank.u64 =
5690 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
5691 if_num));
5692 rlevel_to_wlevel(&lmc_rlevel_rank, &wl_rank, b);
5693 }
5694 } /* for (b = 0; b < 8; ++b) */
5695}
5696
5697static int lmc_sw_write_leveling(struct ddr_priv *priv)
5698{
5699 /* Try to determine/optimize write-level delays experimentally. */
5700 union cvmx_lmcx_wlevel_rankx wl_rank_hw_res;
5701 union cvmx_lmcx_config cfg;
5702 int rankx;
5703 int byte;
5704 char *s;
5705 int i;
5706
5707 int active_rank;
5708 int sw_wl_enable = 1; /* FIX... Should be customizable. */
5709 int interfaces;
5710
5711 static const char * const wl_status_strings[] = {
5712 "(e)",
5713 " ",
5714 " ",
5715 "(1)"
5716 };
5717
5718 // FIXME: make HW-assist the default now?
5719 int sw_wl_hw_default = SW_WLEVEL_HW_DEFAULT;
5720 int dram_connection = c_cfg->dram_connection;
5721
5722 s = lookup_env(priv, "ddr_sw_wlevel_hw");
5723 if (s)
5724 sw_wl_hw_default = !!strtoul(s, NULL, 0);
5725 if (!if_64b) // must use SW algo if 32-bit mode
5726 sw_wl_hw_default = 0;
5727
5728 // can never use hw-assist
5729 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
5730 sw_wl_hw_default = 0;
5731
5732 s = lookup_env(priv, "ddr_software_wlevel");
5733 if (s)
5734 sw_wl_enable = strtoul(s, NULL, 0);
5735
5736 s = lookup_env(priv, "ddr%d_dram_connection", if_num);
5737 if (s)
5738 dram_connection = !!strtoul(s, NULL, 0);
5739
5740 cvmx_rng_enable();
5741
5742 /*
5743 * Get the measured_vref setting from the config, check for an
5744 * override...
5745 */
5746 /* NOTE: measured_vref=1 (ON) means force use of MEASURED vref... */
5747 // NOTE: measured VREF can only be done for DDR4
5748 if (ddr_type == DDR4_DRAM) {
5749 measured_vref_flag = c_cfg->measured_vref;
5750 s = lookup_env(priv, "ddr_measured_vref");
5751 if (s)
5752 measured_vref_flag = !!strtoul(s, NULL, 0);
5753 } else {
5754 measured_vref_flag = 0; // OFF for DDR3
5755 }
5756
5757 /*
5758 * Ensure disabled ECC for DRAM tests using the SW algo, else leave
5759 * it untouched
5760 */
5761 if (!sw_wl_hw_default) {
5762 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
5763 cfg.cn78xx.ecc_ena = 0;
5764 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
5765 }
5766
5767 /*
5768 * We need to track absolute rank number, as well as how many
5769 * active ranks we have. Two single rank DIMMs show up as
5770 * ranks 0 and 2, but only 2 ranks are active.
5771 */
5772 active_rank = 0;
5773
5774 interfaces = __builtin_popcount(if_mask);
5775
5776 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
5777 final_vref_range = 0;
5778 start_vref_val = 0;
5779 computed_final_vref_val = -1;
5780 sw_wl_rank_status = WL_HARDWARE;
5781 sw_wl_failed = 0;
5782 sw_wl_hw = sw_wl_hw_default;
5783
5784 if (!sw_wl_enable)
5785 break;
5786
5787 if (!(rank_mask & (1 << rankx)))
5788 continue;
5789
5790 debug("N%d.LMC%d.R%d: Performing Software Write-Leveling %s\n",
5791 node, if_num, rankx,
5792 (sw_wl_hw) ? "with H/W assist" :
5793 "with S/W algorithm");
5794
5795 if (ddr_type == DDR4_DRAM && num_ranks != 4) {
5796 // always compute when we can...
5797 computed_final_vref_val =
5798 compute_vref_val(priv, if_num, rankx, dimm_count,
5799 num_ranks, imp_val,
5800 is_stacked_die, dram_connection);
5801
5802 // but only use it if allowed
5803 if (!measured_vref_flag) {
5804 // skip all the measured vref processing,
5805 // just the final setting
5806 start_vref_val = VREF_FINAL;
5807 }
5808 }
5809
5810 /* Save off the h/w wl results */
5811 wl_rank_hw_res.u64 = lmc_rd(priv,
5812 CVMX_LMCX_WLEVEL_RANKX(rankx,
5813 if_num));
5814
5815 vref_val_count = 0;
5816 vref_val_start = 0;
5817 best_vref_val_count = 0;
5818 best_vref_val_start = 0;
5819
5820 /* Loop one extra time using the Final vref value. */
5821 for (vref_val = start_vref_val; vref_val < VREF_LIMIT;
5822 ++vref_val) {
5823 if (ddr_type == DDR4_DRAM)
5824 ddr4_vref_loop(priv, rankx);
5825
5826 /* Restore the saved value */
5827 wl_rank.u64 = wl_rank_hw_res.u64;
5828
5829 for (byte = 0; byte < 9; ++byte)
5830 byte_test_status[byte] = WL_ESTIMATED;
5831
5832 if (wl_mask_err == 0) {
5833 /*
5834 * Determine address of DRAM to test for
5835 * pass 1 of software write leveling.
5836 */
5837 rank_addr = active_rank *
5838 (1ull << (pbank_lsb - bunk_enable +
5839 (interfaces / 2)));
5840
5841 /*
5842 * Adjust address for boot bus hole in memory
5843 * map.
5844 */
5845 if (rank_addr > 0x10000000)
5846 rank_addr += 0x10000000;
5847
5848 debug("N%d.LMC%d.R%d: Active Rank %d Address: 0x%llx\n",
5849 node, if_num, rankx, active_rank,
5850 rank_addr);
5851
5852 // start parallel write-leveling block for
5853 // delay high-order bits
5854 errors = 0;
5855 no_errors_count = 0;
5856 sum_dram_dclk = 0;
5857 sum_dram_ops = 0;
5858
5859 if (if_64b) {
5860 bytes_todo = (sw_wl_hw) ?
5861 if_bytemask : 0xFF;
5862 bytemask = ~0ULL;
5863 } else {
5864 // 32-bit, must be using SW algo,
5865 // only data bytes
5866 bytes_todo = 0x0f;
5867 bytemask = 0x00000000ffffffffULL;
5868 }
5869
5870 for (byte = 0; byte < 9; ++byte) {
5871 if (!(bytes_todo & (1 << byte))) {
5872 byte_delay[byte] = 0;
5873 } else {
5874 byte_delay[byte] =
5875 get_wl_rank(&wl_rank, byte);
5876 }
5877 } /* for (byte = 0; byte < 9; ++byte) */
5878
5879 do {
5880 lmc_sw_write_leveling_loop(priv, rankx);
5881 } while (no_errors_count <
5882 WL_MIN_NO_ERRORS_COUNT);
5883
5884 if (!sw_wl_hw) {
5885 u64 percent_x10;
5886
5887 if (sum_dram_dclk == 0)
5888 sum_dram_dclk = 1;
5889 percent_x10 = sum_dram_ops * 1000 /
5890 sum_dram_dclk;
5891 debug("N%d.LMC%d.R%d: ops %llu, cycles %llu, used %llu.%llu%%\n",
5892 node, if_num, rankx, sum_dram_ops,
5893 sum_dram_dclk, percent_x10 / 10,
5894 percent_x10 % 10);
5895 }
5896 if (errors) {
5897 debug("End WLEV_64 while loop: vref_val %d(0x%x), errors 0x%02x\n",
5898 vref_val, vref_val, errors);
5899 }
5900 // end parallel write-leveling block for
5901 // delay high-order bits
5902
5903 // if we used HW-assist, we did the ECC byte
5904 // when approp.
5905 if (sw_wl_hw) {
5906 if (wl_print) {
5907 debug("N%d.LMC%d.R%d: HW-assisted SWL - ECC estimate not needed.\n",
5908 node, if_num, rankx);
5909 }
5910 goto no_ecc_estimate;
5911 }
5912
5913 if ((if_bytemask & 0xff) == 0xff) {
5914 if (use_ecc) {
5915 sw_write_lvl_use_ecc(priv,
5916 rankx);
5917 } else {
5918 /* H/W delay value */
5919 byte_test_status[8] =
5920 WL_HARDWARE;
5921 /* ECC is not used */
5922 wl_rank.s.byte8 =
5923 wl_rank.s.byte0;
5924 }
5925 } else {
5926 if (use_ecc) {
5927 /* Estimate the ECC byte dly */
5928 // add hi-order to b4
5929 wl_rank.s.byte4 |=
5930 (wl_rank.s.byte3 &
5931 0x38);
5932 if ((wl_rank.s.byte4 & 0x06) <
5933 (wl_rank.s.byte3 & 0x06)) {
5934 // must be next clock
5935 wl_rank.s.byte4 += 8;
5936 }
5937 } else {
5938 /* ECC is not used */
5939 wl_rank.s.byte4 =
5940 wl_rank.s.byte0;
5941 }
5942
5943 /*
5944 * Change the status if s/w adjusted
5945 * the delay
5946 */
5947 /* Estimated delay */
5948 byte_test_status[4] = WL_SOFTWARE;
5949 } /* if ((if_bytemask & 0xff) == 0xff) */
5950 } /* if (wl_mask_err == 0) */
5951
5952no_ecc_estimate:
5953
5954 bytes_failed = 0;
5955 for (byte = 0; byte < 9; ++byte) {
5956 /* Don't accumulate errors for untested bytes */
5957 if (!(if_bytemask & (1 << byte)))
5958 continue;
5959 bytes_failed +=
5960 (byte_test_status[byte] == WL_ESTIMATED);
5961 }
5962
5963 /* vref training loop is only used for DDR4 */
5964 if (ddr_type != DDR4_DRAM)
5965 break;
5966
5967 if (bytes_failed == 0) {
5968 if (vref_val_count == 0)
5969 vref_val_start = vref_val;
5970
5971 ++vref_val_count;
5972 if (vref_val_count > best_vref_val_count) {
5973 best_vref_val_count = vref_val_count;
5974 best_vref_val_start = vref_val_start;
5975 debug("N%d.LMC%d.R%d: vref Training (%2d) : 0x%02x <----- ???? -----> 0x%02x\n",
5976 node, if_num, rankx, vref_val,
5977 best_vref_val_start,
5978 best_vref_val_start +
5979 best_vref_val_count - 1);
5980 }
5981 } else {
5982 vref_val_count = 0;
5983 debug("N%d.LMC%d.R%d: vref Training (%2d) : failed\n",
5984 node, if_num, rankx, vref_val);
5985 }
5986 }
5987
5988 /*
5989 * Determine address of DRAM to test for software write
5990 * leveling.
5991 */
5992 rank_addr = active_rank * (1ull << (pbank_lsb - bunk_enable +
5993 (interfaces / 2)));
5994 /* Adjust address for boot bus hole in memory map. */
5995 if (rank_addr > 0x10000000)
5996 rank_addr += 0x10000000;
5997
5998 debug("Rank Address: 0x%llx\n", rank_addr);
5999
6000 if (bytes_failed) {
6001 // FIXME? the big hammer, did not even try SW WL pass2,
6002 // assume only chip reset will help
6003 debug("N%d.LMC%d.R%d: S/W write-leveling pass 1 failed\n",
6004 node, if_num, rankx);
6005 sw_wl_failed = 1;
6006 } else { /* if (bytes_failed) */
6007 // SW WL pass 1 was OK, write the settings
6008 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6009 wl_rank.u64);
6010 wl_rank.u64 = lmc_rd(priv,
6011 CVMX_LMCX_WLEVEL_RANKX(rankx,
6012 if_num));
6013
6014 // do validity check on the delay values by running
6015 // the test 1 more time...
6016 // FIXME: we really need to check the ECC byte setting
6017 // here as well, so we need to enable ECC for this test!
6018 // if there are any errors, claim SW WL failure
6019 u64 datamask = (if_64b) ? 0xffffffffffffffffULL :
6020 0x00000000ffffffffULL;
6021 int errors;
6022
6023 // do the test
6024 if (sw_wl_hw) {
6025 errors = run_best_hw_patterns(priv, if_num,
6026 rank_addr,
6027 DBTRAIN_TEST,
6028 NULL) & 0xff;
6029 } else {
6030 errors = test_dram_byte64(priv, if_num,
6031 rank_addr, datamask,
6032 NULL);
6033 }
6034
6035 if (errors) {
6036 debug("N%d.LMC%d.R%d: Wlevel Rank Final Test errors 0x%03x\n",
6037 node, if_num, rankx, errors);
6038 sw_wl_failed = 1;
6039 }
6040 } /* if (bytes_failed) */
6041
6042 // FIXME? dump the WL settings, so we get more of a clue
6043 // as to what happened where
6044 debug("N%d.LMC%d.R%d: Wlevel Rank %#4x, 0x%016llX : %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %2d%3s %s\n",
6045 node, if_num, rankx, wl_rank.s.status, wl_rank.u64,
6046 wl_rank.s.byte8, wl_status_strings[byte_test_status[8]],
6047 wl_rank.s.byte7, wl_status_strings[byte_test_status[7]],
6048 wl_rank.s.byte6, wl_status_strings[byte_test_status[6]],
6049 wl_rank.s.byte5, wl_status_strings[byte_test_status[5]],
6050 wl_rank.s.byte4, wl_status_strings[byte_test_status[4]],
6051 wl_rank.s.byte3, wl_status_strings[byte_test_status[3]],
6052 wl_rank.s.byte2, wl_status_strings[byte_test_status[2]],
6053 wl_rank.s.byte1, wl_status_strings[byte_test_status[1]],
6054 wl_rank.s.byte0, wl_status_strings[byte_test_status[0]],
6055 (sw_wl_rank_status == WL_HARDWARE) ? "" : "(s)");
6056
6057 // finally, check for fatal conditions: either chip reset
6058 // right here, or return error flag
6059 if ((ddr_type == DDR4_DRAM && best_vref_val_count == 0) ||
6060 sw_wl_failed) {
6061 if (!ddr_disable_chip_reset) { // do chip RESET
6062 printf("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Resetting node...\n",
6063 node, if_num, rankx);
6064 mdelay(500);
6065 do_reset(NULL, 0, 0, NULL);
6066 } else {
6067 // return error flag so LMC init can be retried.
6068 debug("N%d.LMC%d.R%d: INFO: Short memory test indicates a retry is needed. Restarting LMC init...\n",
6069 node, if_num, rankx);
6070 return -EAGAIN; // 0 indicates restart possible.
6071 }
6072 }
6073 active_rank++;
6074 }
6075
6076 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
6077 int parameter_set = 0;
6078 u64 value;
6079
6080 if (!(rank_mask & (1 << rankx)))
6081 continue;
6082
6083 wl_rank.u64 = lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx,
6084 if_num));
6085
6086 for (i = 0; i < 9; ++i) {
6087 s = lookup_env(priv, "ddr%d_wlevel_rank%d_byte%d",
6088 if_num, rankx, i);
6089 if (s) {
6090 parameter_set |= 1;
6091 value = strtoul(s, NULL, 0);
6092
6093 upd_wl_rank(&wl_rank, i, value);
6094 }
6095 }
6096
6097 s = lookup_env_ull(priv, "ddr%d_wlevel_rank%d", if_num, rankx);
6098 if (s) {
6099 parameter_set |= 1;
6100 value = strtoull(s, NULL, 0);
6101 wl_rank.u64 = value;
6102 }
6103
6104 if (parameter_set) {
6105 lmc_wr(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num),
6106 wl_rank.u64);
6107 wl_rank.u64 =
6108 lmc_rd(priv, CVMX_LMCX_WLEVEL_RANKX(rankx, if_num));
6109 display_wl(if_num, wl_rank, rankx);
6110 }
6111 // if there are unused entries to be filled
6112 if ((rank_mask & 0x0F) != 0x0F) {
6113 if (rankx < 3) {
6114 debug("N%d.LMC%d.R%d: checking for WLEVEL_RANK unused entries.\n",
6115 node, if_num, rankx);
6116
6117 // if rank 0, write ranks 1 and 2 here if empty
6118 if (rankx == 0) {
6119 // check that rank 1 is empty
6120 if (!(rank_mask & (1 << 1))) {
6121 debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6122 node, if_num, rankx, 1);
6123 lmc_wr(priv,
6124 CVMX_LMCX_WLEVEL_RANKX(1,
6125 if_num),
6126 wl_rank.u64);
6127 }
6128
6129 // check that rank 2 is empty
6130 if (!(rank_mask & (1 << 2))) {
6131 debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6132 node, if_num, rankx, 2);
6133 lmc_wr(priv,
6134 CVMX_LMCX_WLEVEL_RANKX(2,
6135 if_num),
6136 wl_rank.u64);
6137 }
6138 }
6139
6140 // if rank 0, 1 or 2, write rank 3 here if empty
6141 // check that rank 3 is empty
6142 if (!(rank_mask & (1 << 3))) {
6143 debug("N%d.LMC%d.R%d: writing WLEVEL_RANK unused entry R%d.\n",
6144 node, if_num, rankx, 3);
6145 lmc_wr(priv,
6146 CVMX_LMCX_WLEVEL_RANKX(3,
6147 if_num),
6148 wl_rank.u64);
6149 }
6150 }
6151 }
6152 }
6153
6154 /* Enable 32-bit mode if required. */
6155 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
6156 cfg.cn78xx.mode32b = (!if_64b);
6157 debug("%-45s : %d\n", "MODE32B", cfg.cn78xx.mode32b);
6158
6159 /* Restore the ECC configuration */
6160 if (!sw_wl_hw_default)
6161 cfg.cn78xx.ecc_ena = use_ecc;
6162
6163 lmc_wr(priv, CVMX_LMCX_CONFIG(if_num), cfg.u64);
6164
6165 return 0;
6166}
6167
6168static void lmc_dll(struct ddr_priv *priv)
6169{
6170 union cvmx_lmcx_dll_ctl3 ddr_dll_ctl3;
6171 int setting[9];
6172 int i;
6173
6174 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6175
6176 for (i = 0; i < 9; ++i) {
6177 SET_DDR_DLL_CTL3(dll90_byte_sel, ENCODE_DLL90_BYTE_SEL(i));
6178 lmc_wr(priv, CVMX_LMCX_DLL_CTL3(if_num), ddr_dll_ctl3.u64);
6179 lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6180 ddr_dll_ctl3.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL3(if_num));
6181 setting[i] = GET_DDR_DLL_CTL3(dll90_setting);
6182 debug("%d. LMC%d_DLL_CTL3[%d] = %016llx %d\n", i, if_num,
6183 GET_DDR_DLL_CTL3(dll90_byte_sel), ddr_dll_ctl3.u64,
6184 setting[i]);
6185 }
6186
6187 debug("N%d.LMC%d: %-36s : %5d %5d %5d %5d %5d %5d %5d %5d %5d\n",
6188 node, if_num, "DLL90 Setting 8:0",
6189 setting[8], setting[7], setting[6], setting[5], setting[4],
6190 setting[3], setting[2], setting[1], setting[0]);
6191
6192 process_custom_dll_offsets(priv, if_num, "ddr_dll_write_offset",
6193 c_cfg->dll_write_offset,
6194 "ddr%d_dll_write_offset_byte%d", 1);
6195 process_custom_dll_offsets(priv, if_num, "ddr_dll_read_offset",
6196 c_cfg->dll_read_offset,
6197 "ddr%d_dll_read_offset_byte%d", 2);
6198}
6199
6200#define SLOT_CTL_INCR(csr, chip, field, incr) \
6201 csr.chip.field = (csr.chip.field < (64 - incr)) ? \
6202 (csr.chip.field + incr) : 63
6203
6204#define INCR(csr, chip, field, incr) \
6205 csr.chip.field = (csr.chip.field < (64 - incr)) ? \
6206 (csr.chip.field + incr) : 63
6207
6208static void lmc_workaround_2(struct ddr_priv *priv)
6209{
6210 /* Workaround Errata 21063 */
6211 if (octeon_is_cpuid(OCTEON_CN78XX) ||
6212 octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6213 union cvmx_lmcx_slot_ctl0 slot_ctl0;
6214 union cvmx_lmcx_slot_ctl1 slot_ctl1;
6215 union cvmx_lmcx_slot_ctl2 slot_ctl2;
6216 union cvmx_lmcx_ext_config ext_cfg;
6217
6218 slot_ctl0.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL0(if_num));
6219 slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6220 slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6221
6222 ext_cfg.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(if_num));
6223
6224 /* When ext_cfg.s.read_ena_bprch is set add 1 */
6225 if (ext_cfg.s.read_ena_bprch) {
6226 SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_init, 1);
6227 SLOT_CTL_INCR(slot_ctl0, cn78xx, r2w_l_init, 1);
6228 SLOT_CTL_INCR(slot_ctl1, cn78xx, r2w_xrank_init, 1);
6229 SLOT_CTL_INCR(slot_ctl2, cn78xx, r2w_xdimm_init, 1);
6230 }
6231
6232 /* Always add 2 */
6233 SLOT_CTL_INCR(slot_ctl1, cn78xx, w2r_xrank_init, 2);
6234 SLOT_CTL_INCR(slot_ctl2, cn78xx, w2r_xdimm_init, 2);
6235
6236 lmc_wr(priv, CVMX_LMCX_SLOT_CTL0(if_num), slot_ctl0.u64);
6237 lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6238 lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6239 }
6240
6241 /* Workaround Errata 21216 */
6242 if (octeon_is_cpuid(OCTEON_CN78XX_PASS1_X) ||
6243 octeon_is_cpuid(OCTEON_CN70XX_PASS1_X)) {
6244 union cvmx_lmcx_slot_ctl1 slot_ctl1;
6245 union cvmx_lmcx_slot_ctl2 slot_ctl2;
6246
6247 slot_ctl1.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL1(if_num));
6248 slot_ctl1.cn78xx.w2w_xrank_init =
6249 max(10, (int)slot_ctl1.cn78xx.w2w_xrank_init);
6250 lmc_wr(priv, CVMX_LMCX_SLOT_CTL1(if_num), slot_ctl1.u64);
6251
6252 slot_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_SLOT_CTL2(if_num));
6253 slot_ctl2.cn78xx.w2w_xdimm_init =
6254 max(10, (int)slot_ctl2.cn78xx.w2w_xdimm_init);
6255 lmc_wr(priv, CVMX_LMCX_SLOT_CTL2(if_num), slot_ctl2.u64);
6256 }
6257}
6258
6259static void lmc_final(struct ddr_priv *priv)
6260{
6261 /*
6262 * 4.8.11 Final LMC Initialization
6263 *
6264 * Early LMC initialization, LMC write-leveling, and LMC read-leveling
6265 * must be completed prior to starting this final LMC initialization.
6266 *
6267 * LMC hardware updates the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1,
6268 * LMC(0)_SLOT_CTL2 CSRs with minimum values based on the selected
6269 * readleveling and write-leveling settings. Software should not write
6270 * the final LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and LMC(0)_SLOT_CTL2
6271 * values until after the final read-leveling and write-leveling
6272 * settings are written.
6273 *
6274 * Software must ensure the LMC(0)_SLOT_CTL0, LMC(0)_SLOT_CTL1, and
6275 * LMC(0)_SLOT_CTL2 CSR values are appropriate for this step. These CSRs
6276 * select the minimum gaps between read operations and write operations
6277 * of various types.
6278 *
6279 * Software must not reduce the values in these CSR fields below the
6280 * values previously selected by the LMC hardware (during write-leveling
6281 * and read-leveling steps above).
6282 *
6283 * All sections in this chapter may be used to derive proper settings
6284 * for these registers.
6285 *
6286 * For minimal read latency, L2C_CTL[EF_ENA,EF_CNT] should be programmed
6287 * properly. This should be done prior to the first read.
6288 */
6289
6290 /* Clear any residual ECC errors */
6291 int num_tads = 1;
6292 int tad;
6293 int num_mcis = 1;
6294 int mci;
6295
6296 if (octeon_is_cpuid(OCTEON_CN78XX)) {
6297 num_tads = 8;
6298 num_mcis = 4;
6299 } else if (octeon_is_cpuid(OCTEON_CN70XX)) {
6300 num_tads = 1;
6301 num_mcis = 1;
6302 } else if (octeon_is_cpuid(OCTEON_CN73XX) ||
6303 octeon_is_cpuid(OCTEON_CNF75XX)) {
6304 num_tads = 4;
6305 num_mcis = 3;
6306 }
6307
6308 lmc_wr(priv, CVMX_LMCX_INT(if_num), -1ULL);
6309 lmc_rd(priv, CVMX_LMCX_INT(if_num));
6310
6311 for (tad = 0; tad < num_tads; tad++) {
Stefan Roese1a035f82020-12-11 17:05:56 +01006312 l2c_wr(priv, CVMX_L2C_TADX_INT_REL(tad),
6313 l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad)));
Aaron Williamse60c6a72020-09-02 08:29:07 +02006314 debug("%-45s : (%d) 0x%08llx\n", "CVMX_L2C_TAD_INT", tad,
Stefan Roese1a035f82020-12-11 17:05:56 +01006315 l2c_rd(priv, CVMX_L2C_TADX_INT_REL(tad)));
Aaron Williamse60c6a72020-09-02 08:29:07 +02006316 }
6317
6318 for (mci = 0; mci < num_mcis; mci++) {
Stefan Roese1a035f82020-12-11 17:05:56 +01006319 l2c_wr(priv, CVMX_L2C_MCIX_INT_REL(mci),
6320 l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci)));
Aaron Williamse60c6a72020-09-02 08:29:07 +02006321 debug("%-45s : (%d) 0x%08llx\n", "L2C_MCI_INT", mci,
Stefan Roese1a035f82020-12-11 17:05:56 +01006322 l2c_rd(priv, CVMX_L2C_MCIX_INT_REL(mci)));
Aaron Williamse60c6a72020-09-02 08:29:07 +02006323 }
6324
6325 debug("%-45s : 0x%08llx\n", "LMC_INT",
6326 lmc_rd(priv, CVMX_LMCX_INT(if_num)));
6327}
6328
6329static void lmc_scrambling(struct ddr_priv *priv)
6330{
6331 // Make sure scrambling is disabled during init...
6332 union cvmx_lmcx_control ctrl;
6333 union cvmx_lmcx_scramble_cfg0 lmc_scramble_cfg0;
6334 union cvmx_lmcx_scramble_cfg1 lmc_scramble_cfg1;
6335 union cvmx_lmcx_scramble_cfg2 lmc_scramble_cfg2;
6336 union cvmx_lmcx_ns_ctl lmc_ns_ctl;
6337 int use_scramble = 0; // default OFF
6338 char *s;
6339
6340 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
6341 lmc_scramble_cfg0.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num));
6342 lmc_scramble_cfg1.u64 = lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num));
6343 lmc_scramble_cfg2.u64 = 0; // quiet compiler
6344 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6345 lmc_scramble_cfg2.u64 =
6346 lmc_rd(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num));
6347 }
6348 lmc_ns_ctl.u64 = lmc_rd(priv, CVMX_LMCX_NS_CTL(if_num));
6349
6350 s = lookup_env_ull(priv, "ddr_use_scramble");
6351 if (s)
6352 use_scramble = simple_strtoull(s, NULL, 0);
6353
6354 /* Generate random values if scrambling is needed */
6355 if (use_scramble) {
6356 lmc_scramble_cfg0.u64 = cvmx_rng_get_random64();
6357 lmc_scramble_cfg1.u64 = cvmx_rng_get_random64();
6358 lmc_scramble_cfg2.u64 = cvmx_rng_get_random64();
6359 lmc_ns_ctl.s.ns_scramble_dis = 0;
6360 lmc_ns_ctl.s.adr_offset = 0;
6361 ctrl.s.scramble_ena = 1;
6362 }
6363
6364 s = lookup_env_ull(priv, "ddr_scramble_cfg0");
6365 if (s) {
6366 lmc_scramble_cfg0.u64 = simple_strtoull(s, NULL, 0);
6367 ctrl.s.scramble_ena = 1;
6368 }
6369 debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG0",
6370 lmc_scramble_cfg0.u64);
6371
6372 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), lmc_scramble_cfg0.u64);
6373
6374 s = lookup_env_ull(priv, "ddr_scramble_cfg1");
6375 if (s) {
6376 lmc_scramble_cfg1.u64 = simple_strtoull(s, NULL, 0);
6377 ctrl.s.scramble_ena = 1;
6378 }
6379 debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG1",
6380 lmc_scramble_cfg1.u64);
6381 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), lmc_scramble_cfg1.u64);
6382
6383 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X)) {
6384 s = lookup_env_ull(priv, "ddr_scramble_cfg2");
6385 if (s) {
6386 lmc_scramble_cfg2.u64 = simple_strtoull(s, NULL, 0);
6387 ctrl.s.scramble_ena = 1;
6388 }
6389 debug("%-45s : 0x%016llx\n", "LMC_SCRAMBLE_CFG2",
6390 lmc_scramble_cfg1.u64);
6391 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num),
6392 lmc_scramble_cfg2.u64);
6393 }
6394
6395 s = lookup_env_ull(priv, "ddr_ns_ctl");
6396 if (s)
6397 lmc_ns_ctl.u64 = simple_strtoull(s, NULL, 0);
6398 debug("%-45s : 0x%016llx\n", "LMC_NS_CTL", lmc_ns_ctl.u64);
6399 lmc_wr(priv, CVMX_LMCX_NS_CTL(if_num), lmc_ns_ctl.u64);
6400
6401 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
6402}
6403
6404struct rl_score {
6405 u64 setting;
6406 int score;
6407};
6408
6409static union cvmx_lmcx_rlevel_rankx rl_rank __section(".data");
6410static union cvmx_lmcx_rlevel_ctl rl_ctl __section(".data");
6411static unsigned char rodt_ctl __section(".data");
6412
6413static int rl_rodt_err __section(".data");
6414static unsigned char rtt_nom __section(".data");
6415static unsigned char rtt_idx __section(".data");
6416static char min_rtt_nom_idx __section(".data");
6417static char max_rtt_nom_idx __section(".data");
6418static char min_rodt_ctl __section(".data");
6419static char max_rodt_ctl __section(".data");
6420static int rl_dbg_loops __section(".data");
6421static unsigned char save_ddr2t __section(".data");
6422static int rl_samples __section(".data");
6423static char rl_compute __section(".data");
6424static char saved_ddr__ptune __section(".data");
6425static char saved_ddr__ntune __section(".data");
6426static char rl_comp_offs __section(".data");
6427static char saved_int_zqcs_dis __section(".data");
6428static int max_adj_rl_del_inc __section(".data");
6429static int print_nom_ohms __section(".data");
6430static int rl_print __section(".data");
6431
6432#ifdef ENABLE_HARDCODED_RLEVEL
6433static char part_number[21] __section(".data");
6434#endif /* ENABLE_HARDCODED_RLEVEL */
6435
6436struct perfect_counts {
6437 u16 count[9][32]; // 8+ECC by 64 values
6438 u32 mask[9]; // 8+ECC, bitmask of perfect delays
6439};
6440
6441static struct perfect_counts rank_perf[4] __section(".data");
6442static struct perfect_counts rodt_perfect_counts __section(".data");
6443static int pbm_lowsum_limit __section(".data");
6444// FIXME: PBM skip for RODT 240 and 34
6445static u32 pbm_rodt_skip __section(".data");
6446
6447// control rank majority processing
6448static int disable_rank_majority __section(".data");
6449
6450// default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
6451// for DDR3
6452static int enable_rldelay_bump __section(".data");
6453static int rldelay_bump_incr __section(".data");
6454static int disable_rlv_bump_this_byte __section(".data");
6455static u64 value_mask __section(".data");
6456
6457static struct rlevel_byte_data rl_byte[9] __section(".data");
6458static int sample_loops __section(".data");
6459static int max_samples __section(".data");
6460static int rl_rank_errors __section(".data");
6461static int rl_mask_err __section(".data");
6462static int rl_nonseq_err __section(".data");
6463static struct rlevel_bitmask rl_mask[9] __section(".data");
6464static int rl_best_rank_score __section(".data");
6465
6466static int rodt_row_skip_mask __section(".data");
6467
6468static void rodt_loop(struct ddr_priv *priv, int rankx, struct rl_score
6469 rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6470{
6471 union cvmx_lmcx_comp_ctl2 cc2;
6472 const int rl_separate_ab = 1;
6473 int i;
6474
6475 rl_best_rank_score = DEFAULT_BEST_RANK_SCORE;
6476 rl_rodt_err = 0;
6477 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6478 cc2.cn78xx.rodt_ctl = rodt_ctl;
6479 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
6480 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
6481 udelay(1); /* Give it a little time to take affect */
6482 if (rl_print > 1) {
6483 debug("Read ODT_CTL : 0x%x (%d ohms)\n",
6484 cc2.cn78xx.rodt_ctl,
6485 imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
6486 }
6487
6488 memset(rl_byte, 0, sizeof(rl_byte));
6489 memset(&rodt_perfect_counts, 0, sizeof(rodt_perfect_counts));
6490
6491 // when iter RODT is the target RODT, take more samples...
6492 max_samples = rl_samples;
6493 if (rodt_ctl == default_rodt_ctl)
6494 max_samples += rl_samples + 1;
6495
6496 for (sample_loops = 0; sample_loops < max_samples; sample_loops++) {
6497 int redoing_nonseq_errs = 0;
6498
6499 rl_mask_err = 0;
6500
6501 if (!(rl_separate_ab && spd_rdimm &&
6502 ddr_type == DDR4_DRAM)) {
6503 /* Clear read-level delays */
6504 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6505
6506 /* read-leveling */
6507 oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6508
6509 do {
6510 rl_rank.u64 =
6511 lmc_rd(priv,
6512 CVMX_LMCX_RLEVEL_RANKX(rankx,
6513 if_num));
6514 } while (rl_rank.cn78xx.status != 3);
6515 }
6516
6517 rl_rank.u64 =
6518 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6519
6520 // start bitmask interpretation block
6521
6522 memset(rl_mask, 0, sizeof(rl_mask));
6523
6524 if (rl_separate_ab && spd_rdimm && ddr_type == DDR4_DRAM) {
6525 union cvmx_lmcx_rlevel_rankx rl_rank_aside;
6526 union cvmx_lmcx_modereg_params0 mp0;
6527
6528 /* A-side */
6529 mp0.u64 =
6530 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6531 mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6532 lmc_wr(priv,
6533 CVMX_LMCX_MODEREG_PARAMS0(if_num),
6534 mp0.u64);
6535
6536 /* Clear read-level delays */
6537 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6538
6539 /* read-leveling */
6540 oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6541
6542 do {
6543 rl_rank.u64 =
6544 lmc_rd(priv,
6545 CVMX_LMCX_RLEVEL_RANKX(rankx,
6546 if_num));
6547 } while (rl_rank.cn78xx.status != 3);
6548
6549 rl_rank.u64 =
6550 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6551 if_num));
6552
6553 rl_rank_aside.u64 = rl_rank.u64;
6554
6555 rl_mask[0].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 0);
6556 rl_mask[1].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 1);
6557 rl_mask[2].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 2);
6558 rl_mask[3].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 3);
6559 rl_mask[8].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 8);
6560 /* A-side complete */
6561
6562 /* B-side */
6563 mp0.u64 =
6564 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6565 mp0.s.mprloc = 3; /* MPR Page 0 Location 3 */
6566 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6567 mp0.u64);
6568
6569 /* Clear read-level delays */
6570 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), 0);
6571
6572 /* read-leveling */
6573 oct3_ddr3_seq(priv, 1 << rankx, if_num, 1);
6574
6575 do {
6576 rl_rank.u64 =
6577 lmc_rd(priv,
6578 CVMX_LMCX_RLEVEL_RANKX(rankx,
6579 if_num));
6580 } while (rl_rank.cn78xx.status != 3);
6581
6582 rl_rank.u64 =
6583 lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
6584 if_num));
6585
6586 rl_mask[4].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 4);
6587 rl_mask[5].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 5);
6588 rl_mask[6].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 6);
6589 rl_mask[7].bm = lmc_ddr3_rl_dbg_read(priv, if_num, 7);
6590 /* B-side complete */
6591
6592 upd_rl_rank(&rl_rank, 0, rl_rank_aside.s.byte0);
6593 upd_rl_rank(&rl_rank, 1, rl_rank_aside.s.byte1);
6594 upd_rl_rank(&rl_rank, 2, rl_rank_aside.s.byte2);
6595 upd_rl_rank(&rl_rank, 3, rl_rank_aside.s.byte3);
6596 /* ECC A-side */
6597 upd_rl_rank(&rl_rank, 8, rl_rank_aside.s.byte8);
6598
6599 mp0.u64 =
6600 lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num));
6601 mp0.s.mprloc = 0; /* MPR Page 0 Location 0 */
6602 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(if_num),
6603 mp0.u64);
6604 }
6605
6606 /*
6607 * Evaluate the quality of the read-leveling delays from the
6608 * bitmasks. Also save off a software computed read-leveling
6609 * mask that may be used later to qualify the delay results
6610 * from Octeon.
6611 */
6612 for (i = 0; i < (8 + ecc_ena); ++i) {
6613 int bmerr;
6614
6615 if (!(if_bytemask & (1 << i)))
6616 continue;
6617 if (!(rl_separate_ab && spd_rdimm &&
6618 ddr_type == DDR4_DRAM)) {
6619 rl_mask[i].bm =
6620 lmc_ddr3_rl_dbg_read(priv, if_num, i);
6621 }
6622 bmerr = validate_ddr3_rlevel_bitmask(&rl_mask[i],
6623 ddr_type);
6624 rl_mask[i].errs = bmerr;
6625 rl_mask_err += bmerr;
6626 // count only the "perfect" bitmasks
6627 if (ddr_type == DDR4_DRAM && !bmerr) {
6628 int delay;
6629 // FIXME: for now, simple filtering:
6630 // do NOT count PBMs for RODTs in skip mask
6631 if ((1U << rodt_ctl) & pbm_rodt_skip)
6632 continue;
6633 // FIXME: could optimize this a bit?
6634 delay = get_rl_rank(&rl_rank, i);
6635 rank_perf[rankx].count[i][delay] += 1;
6636 rank_perf[rankx].mask[i] |=
6637 (1ULL << delay);
6638 rodt_perfect_counts.count[i][delay] += 1;
6639 rodt_perfect_counts.mask[i] |= (1ULL << delay);
6640 }
6641 }
6642
6643 /* Set delays for unused bytes to match byte 0. */
6644 for (i = 0; i < 9; ++i) {
6645 if (if_bytemask & (1 << i))
6646 continue;
6647 upd_rl_rank(&rl_rank, i, rl_rank.s.byte0);
6648 }
6649
6650 /*
6651 * Save a copy of the byte delays in physical
6652 * order for sequential evaluation.
6653 */
6654 unpack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, rl_rank);
6655
6656 redo_nonseq_errs:
6657
6658 rl_nonseq_err = 0;
6659 if (!disable_sequential_delay_check) {
6660 for (i = 0; i < 9; ++i)
6661 rl_byte[i].sqerrs = 0;
6662
6663 if ((if_bytemask & 0xff) == 0xff) {
6664 /*
6665 * Evaluate delay sequence across the whole
6666 * range of bytes for standard dimms.
6667 */
6668 /* 1=RDIMM, 5=Mini-RDIMM */
6669 if (spd_dimm_type == 1 || spd_dimm_type == 5) {
6670 int reg_adj_del = abs(rl_byte[4].delay -
6671 rl_byte[5].delay);
6672
6673 /*
6674 * Registered dimm topology routes
6675 * from the center.
6676 */
6677 rl_nonseq_err +=
6678 nonseq_del(rl_byte, 0,
6679 3 + ecc_ena,
6680 max_adj_rl_del_inc);
6681 rl_nonseq_err +=
6682 nonseq_del(rl_byte, 5,
6683 7 + ecc_ena,
6684 max_adj_rl_del_inc);
6685 // byte 5 sqerrs never gets cleared
6686 // for RDIMMs
6687 rl_byte[5].sqerrs = 0;
6688 if (reg_adj_del > 1) {
6689 /*
6690 * Assess proximity of bytes on
6691 * opposite sides of register
6692 */
6693 rl_nonseq_err += (reg_adj_del -
6694 1) *
6695 RLEVEL_ADJACENT_DELAY_ERROR;
6696 // update byte 5 error
6697 rl_byte[5].sqerrs +=
6698 (reg_adj_del - 1) *
6699 RLEVEL_ADJACENT_DELAY_ERROR;
6700 }
6701 }
6702
6703 /* 2=UDIMM, 6=Mini-UDIMM */
6704 if (spd_dimm_type == 2 || spd_dimm_type == 6) {
6705 /*
6706 * Unbuffered dimm topology routes
6707 * from end to end.
6708 */
6709 rl_nonseq_err += nonseq_del(rl_byte, 0,
6710 7 + ecc_ena,
6711 max_adj_rl_del_inc);
6712 }
6713 } else {
6714 rl_nonseq_err += nonseq_del(rl_byte, 0,
6715 3 + ecc_ena,
6716 max_adj_rl_del_inc);
6717 }
6718 } /* if (! disable_sequential_delay_check) */
6719
6720 rl_rank_errors = rl_mask_err + rl_nonseq_err;
6721
6722 // print original sample here only if we are not really
6723 // averaging or picking best
6724 // also do not print if we were redoing the NONSEQ score
6725 // for using COMPUTED
6726 if (!redoing_nonseq_errs && rl_samples < 2) {
6727 if (rl_print > 1) {
6728 display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6729 display_rl_bm_scores(if_num, rankx, rl_mask,
6730 ecc_ena);
6731 display_rl_seq_scores(if_num, rankx, rl_byte,
6732 ecc_ena);
6733 }
6734 display_rl_with_score(if_num, rl_rank, rankx,
6735 rl_rank_errors);
6736 }
6737
6738 if (rl_compute) {
6739 if (!redoing_nonseq_errs) {
6740 /* Recompute the delays based on the bitmask */
6741 for (i = 0; i < (8 + ecc_ena); ++i) {
6742 if (!(if_bytemask & (1 << i)))
6743 continue;
6744
6745 upd_rl_rank(&rl_rank, i,
6746 compute_ddr3_rlevel_delay(
6747 rl_mask[i].mstart,
6748 rl_mask[i].width,
6749 rl_ctl));
6750 }
6751
6752 /*
6753 * Override the copy of byte delays with the
6754 * computed results.
6755 */
6756 unpack_rlevel_settings(if_bytemask, ecc_ena,
6757 rl_byte, rl_rank);
6758
6759 redoing_nonseq_errs = 1;
6760 goto redo_nonseq_errs;
6761
6762 } else {
6763 /*
6764 * now print this if already printed the
6765 * original sample
6766 */
6767 if (rl_samples < 2 || rl_print) {
6768 display_rl_with_computed(if_num,
6769 rl_rank, rankx,
6770 rl_rank_errors);
6771 }
6772 }
6773 } /* if (rl_compute) */
6774
6775 // end bitmask interpretation block
6776
6777 // if it is a better (lower) score, then keep it
6778 if (rl_rank_errors < rl_best_rank_score) {
6779 rl_best_rank_score = rl_rank_errors;
6780
6781 // save the new best delays and best errors
6782 for (i = 0; i < (8 + ecc_ena); ++i) {
6783 rl_byte[i].best = rl_byte[i].delay;
6784 rl_byte[i].bestsq = rl_byte[i].sqerrs;
6785 // save bitmasks and their scores as well
6786 // xlate UNPACKED index to PACKED index to
6787 // get from rl_mask
6788 rl_byte[i].bm = rl_mask[XUP(i, !!ecc_ena)].bm;
6789 rl_byte[i].bmerrs =
6790 rl_mask[XUP(i, !!ecc_ena)].errs;
6791 }
6792 }
6793
6794 rl_rodt_err += rl_rank_errors;
6795 }
6796
6797 /* We recorded the best score across the averaging loops */
6798 rl_score[rtt_nom][rodt_ctl][rankx].score = rl_best_rank_score;
6799
6800 /*
6801 * Restore the delays from the best fields that go with the best
6802 * score
6803 */
6804 for (i = 0; i < 9; ++i) {
6805 rl_byte[i].delay = rl_byte[i].best;
6806 rl_byte[i].sqerrs = rl_byte[i].bestsq;
6807 }
6808
6809 rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
6810
6811 pack_rlevel_settings(if_bytemask, ecc_ena, rl_byte, &rl_rank);
6812
6813 if (rl_samples > 1) {
6814 // restore the "best" bitmasks and their scores for printing
6815 for (i = 0; i < 9; ++i) {
6816 if ((if_bytemask & (1 << i)) == 0)
6817 continue;
6818 // xlate PACKED index to UNPACKED index to get from
6819 // rl_byte
6820 rl_mask[i].bm = rl_byte[XPU(i, !!ecc_ena)].bm;
6821 rl_mask[i].errs = rl_byte[XPU(i, !!ecc_ena)].bmerrs;
6822 }
6823
6824 // maybe print bitmasks/scores here
6825 if (rl_print > 1) {
6826 display_rl_bm(if_num, rankx, rl_mask, ecc_ena);
6827 display_rl_bm_scores(if_num, rankx, rl_mask, ecc_ena);
6828 display_rl_seq_scores(if_num, rankx, rl_byte, ecc_ena);
6829
6830 display_rl_with_rodt(if_num, rl_rank, rankx,
6831 rl_score[rtt_nom][rodt_ctl][rankx].score,
6832 print_nom_ohms,
6833 imp_val->rodt_ohms[rodt_ctl],
6834 WITH_RODT_BESTSCORE);
6835
6836 debug("-----------\n");
6837 }
6838 }
6839
6840 rl_score[rtt_nom][rodt_ctl][rankx].setting = rl_rank.u64;
6841
6842 // print out the PBMs for the current RODT
6843 if (ddr_type == DDR4_DRAM && rl_print > 1) { // verbosity?
6844 // FIXME: change verbosity level after debug complete...
6845
6846 for (i = 0; i < 9; i++) {
6847 u64 temp_mask;
6848 int num_values;
6849
6850 // FIXME: PBM skip for RODTs in mask
6851 if ((1U << rodt_ctl) & pbm_rodt_skip)
6852 continue;
6853
6854 temp_mask = rodt_perfect_counts.mask[i];
6855 num_values = __builtin_popcountll(temp_mask);
6856 i = __builtin_ffsll(temp_mask) - 1;
6857
6858 debug("N%d.LMC%d.R%d: PERFECT: RODT %3d: Byte %d: mask 0x%02llx (%d): ",
6859 node, if_num, rankx,
6860 imp_val->rodt_ohms[rodt_ctl],
6861 i, temp_mask >> i, num_values);
6862
6863 while (temp_mask != 0) {
6864 i = __builtin_ffsll(temp_mask) - 1;
6865 debug("%2d(%2d) ", i,
6866 rodt_perfect_counts.count[i][i]);
6867 temp_mask &= ~(1UL << i);
6868 } /* while (temp_mask != 0) */
6869 debug("\n");
6870 }
6871 }
6872}
6873
6874static void rank_major_loop(struct ddr_priv *priv, int rankx, struct rl_score
6875 rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4])
6876{
6877 /* Start with an arbitrarily high score */
6878 int best_rank_score = DEFAULT_BEST_RANK_SCORE;
6879 int best_rank_rtt_nom = 0;
6880 int best_rank_ctl = 0;
6881 int best_rank_ohms = 0;
6882 int best_rankx = 0;
6883 int dimm_rank_mask;
6884 int max_rank_score;
6885 union cvmx_lmcx_rlevel_rankx saved_rl_rank;
6886 int next_ohms;
6887 int orankx;
6888 int next_score = 0;
6889 int best_byte, new_byte, temp_byte, orig_best_byte;
6890 int rank_best_bytes[9];
6891 int byte_sh;
6892 int avg_byte;
6893 int avg_diff;
6894 int i;
6895
6896 if (!(rank_mask & (1 << rankx)))
6897 return;
6898
6899 // some of the rank-related loops below need to operate only on
6900 // the ranks of a single DIMM,
6901 // so create a mask for their use here
6902 if (num_ranks == 4) {
6903 dimm_rank_mask = rank_mask; // should be 1111
6904 } else {
6905 dimm_rank_mask = rank_mask & 3; // should be 01 or 11
6906 if (rankx >= 2) {
6907 // doing a rank on the second DIMM, should be
6908 // 0100 or 1100
6909 dimm_rank_mask <<= 2;
6910 }
6911 }
6912 debug("DIMM rank mask: 0x%x, rank mask: 0x%x, rankx: %d\n",
6913 dimm_rank_mask, rank_mask, rankx);
6914
6915 // this is the start of the BEST ROW SCORE LOOP
6916
6917 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
6918 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
6919
6920 debug("N%d.LMC%d.R%d: starting RTT_NOM %d (%d)\n",
6921 node, if_num, rankx, rtt_nom,
6922 imp_val->rtt_nom_ohms[rtt_nom]);
6923
6924 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
6925 --rodt_ctl) {
6926 next_ohms = imp_val->rodt_ohms[rodt_ctl];
6927
6928 // skip RODT rows in mask, but *NOT* rows with too
6929 // high a score;
6930 // we will not use the skipped ones for printing or
6931 // evaluating, but we need to allow all the
6932 // non-skipped ones to be candidates for "best"
6933 if (((1 << rodt_ctl) & rodt_row_skip_mask) != 0) {
6934 debug("N%d.LMC%d.R%d: SKIPPING rodt:%d (%d) with rank_score:%d\n",
6935 node, if_num, rankx, rodt_ctl,
6936 next_ohms, next_score);
6937 continue;
6938 }
6939
6940 // this is ROFFIX-0528
6941 for (orankx = 0; orankx < dimm_count * 4; orankx++) {
6942 // stay on the same DIMM
6943 if (!(dimm_rank_mask & (1 << orankx)))
6944 continue;
6945
6946 next_score = rl_score[rtt_nom][rodt_ctl][orankx].score;
6947
6948 // always skip a higher score
6949 if (next_score > best_rank_score)
6950 continue;
6951
6952 // if scores are equal
6953 if (next_score == best_rank_score) {
6954 // always skip lower ohms
6955 if (next_ohms < best_rank_ohms)
6956 continue;
6957
6958 // if same ohms
6959 if (next_ohms == best_rank_ohms) {
6960 // always skip the other rank(s)
6961 if (orankx != rankx)
6962 continue;
6963 }
6964 // else next_ohms are greater,
6965 // always choose it
6966 }
6967 // else next_score is less than current best,
6968 // so always choose it
6969 debug("N%d.LMC%d.R%d: new best score: rank %d, rodt %d(%3d), new best %d, previous best %d(%d)\n",
6970 node, if_num, rankx, orankx, rodt_ctl, next_ohms, next_score,
6971 best_rank_score, best_rank_ohms);
6972 best_rank_score = next_score;
6973 best_rank_rtt_nom = rtt_nom;
6974 //best_rank_nom_ohms = rtt_nom_ohms;
6975 best_rank_ctl = rodt_ctl;
6976 best_rank_ohms = next_ohms;
6977 best_rankx = orankx;
6978 rl_rank.u64 =
6979 rl_score[rtt_nom][rodt_ctl][orankx].setting;
6980 }
6981 }
6982 }
6983
6984 // this is the end of the BEST ROW SCORE LOOP
6985
6986 // DANGER, Will Robinson!! Abort now if we did not find a best
6987 // score at all...
6988 if (best_rank_score == DEFAULT_BEST_RANK_SCORE) {
6989 printf("N%d.LMC%d.R%d: WARNING: no best rank score found - resetting node...\n",
6990 node, if_num, rankx);
6991 mdelay(500);
6992 do_reset(NULL, 0, 0, NULL);
6993 }
6994
6995 // FIXME: relative now, but still arbitrary...
6996 max_rank_score = best_rank_score;
6997 if (ddr_type == DDR4_DRAM) {
6998 // halve the range if 2 DIMMs unless they are single rank...
6999 max_rank_score += (MAX_RANK_SCORE_LIMIT / ((num_ranks > 1) ?
7000 dimm_count : 1));
7001 } else {
7002 // Since DDR3 typically has a wider score range,
7003 // keep more of them always
7004 max_rank_score += MAX_RANK_SCORE_LIMIT;
7005 }
7006
7007 if (!ecc_ena) {
7008 /* ECC is not used */
7009 rl_rank.s.byte8 = rl_rank.s.byte0;
7010 }
7011
7012 // at the end, write the best row settings to the current rank
7013 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num), rl_rank.u64);
7014 rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7015
7016 saved_rl_rank.u64 = rl_rank.u64;
7017
7018 // this is the start of the PRINT LOOP
7019 int pass;
7020
7021 // for pass==0, print current rank, pass==1 print other rank(s)
7022 // this is done because we want to show each ranks RODT values
7023 // together, not interlaced
7024 // keep separates for ranks - pass=0 target rank, pass=1 other
7025 // rank on DIMM
7026 int mask_skipped[2] = {0, 0};
7027 int score_skipped[2] = {0, 0};
7028 int selected_rows[2] = {0, 0};
7029 int zero_scores[2] = {0, 0};
7030 for (pass = 0; pass < 2; pass++) {
7031 for (orankx = 0; orankx < dimm_count * 4; orankx++) {
7032 // stay on the same DIMM
7033 if (!(dimm_rank_mask & (1 << orankx)))
7034 continue;
7035
7036 if ((pass == 0 && orankx != rankx) ||
7037 (pass != 0 && orankx == rankx))
7038 continue;
7039
7040 for (rtt_idx = min_rtt_nom_idx;
7041 rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
7042 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7043 if (dyn_rtt_nom_mask == 0) {
7044 print_nom_ohms = -1;
7045 } else {
7046 print_nom_ohms =
7047 imp_val->rtt_nom_ohms[rtt_nom];
7048 }
7049
7050 // cycle through all the RODT values...
7051 for (rodt_ctl = max_rodt_ctl;
7052 rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
7053 union cvmx_lmcx_rlevel_rankx
7054 temp_rl_rank;
7055 int temp_score =
7056 rl_score[rtt_nom][rodt_ctl][orankx].score;
7057 int skip_row;
7058
7059 temp_rl_rank.u64 =
7060 rl_score[rtt_nom][rodt_ctl][orankx].setting;
7061
7062 // skip RODT rows in mask, or rows
7063 // with too high a score;
7064 // we will not use them for printing
7065 // or evaluating...
7066 if ((1 << rodt_ctl) &
7067 rodt_row_skip_mask) {
7068 skip_row = WITH_RODT_SKIPPING;
7069 ++mask_skipped[pass];
7070 } else if (temp_score >
7071 max_rank_score) {
7072 skip_row = WITH_RODT_SKIPPING;
7073 ++score_skipped[pass];
7074 } else {
7075 skip_row = WITH_RODT_BLANK;
7076 ++selected_rows[pass];
7077 if (temp_score == 0)
7078 ++zero_scores[pass];
7079 }
7080
7081 // identify and print the BEST ROW
7082 // when it comes up
7083 if (skip_row == WITH_RODT_BLANK &&
7084 best_rankx == orankx &&
7085 best_rank_rtt_nom == rtt_nom &&
7086 best_rank_ctl == rodt_ctl)
7087 skip_row = WITH_RODT_BESTROW;
7088
7089 if (rl_print) {
7090 display_rl_with_rodt(if_num,
7091 temp_rl_rank, orankx, temp_score,
7092 print_nom_ohms,
7093 imp_val->rodt_ohms[rodt_ctl],
7094 skip_row);
7095 }
7096 }
7097 }
7098 }
7099 }
7100 debug("N%d.LMC%d.R%d: RLROWS: selected %d+%d, zero_scores %d+%d, mask_skipped %d+%d, score_skipped %d+%d\n",
7101 node, if_num, rankx, selected_rows[0], selected_rows[1],
7102 zero_scores[0], zero_scores[1], mask_skipped[0], mask_skipped[1],
7103 score_skipped[0], score_skipped[1]);
7104 // this is the end of the PRINT LOOP
7105
7106 // now evaluate which bytes need adjusting
7107 // collect the new byte values; first init with current best for
7108 // neighbor use
7109 for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7110 rank_best_bytes[i] = (int)(rl_rank.u64 >> byte_sh) &
7111 RLEVEL_BYTE_MSK;
7112 }
7113
7114 // this is the start of the BEST BYTE LOOP
7115
7116 for (i = 0, byte_sh = 0; i < 8 + ecc_ena; i++, byte_sh += 6) {
7117 int sum = 0, count = 0;
7118 int count_less = 0, count_same = 0, count_more = 0;
7119 int count_byte; // save the value we counted around
7120 // for rank majority use
7121 int rank_less = 0, rank_same = 0, rank_more = 0;
7122 int neighbor;
7123 int neigh_byte;
7124
7125 best_byte = rank_best_bytes[i];
7126 orig_best_byte = rank_best_bytes[i];
7127
7128 // this is the start of the BEST BYTE AVERAGING LOOP
7129
7130 // validate the initial "best" byte by looking at the
7131 // average of the unskipped byte-column entries
7132 // we want to do this before we go further, so we can
7133 // try to start with a better initial value
7134 // this is the so-called "BESTBUY" patch set
7135
7136 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7137 ++rtt_idx) {
7138 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7139
7140 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7141 --rodt_ctl) {
7142 union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7143 int temp_score;
7144
7145 // average over all the ranks
7146 for (orankx = 0; orankx < dimm_count * 4;
7147 orankx++) {
7148 // stay on the same DIMM
7149 if (!(dimm_rank_mask & (1 << orankx)))
7150 continue;
7151
7152 temp_score =
7153 rl_score[rtt_nom][rodt_ctl][orankx].score;
7154 // skip RODT rows in mask, or rows with
7155 // too high a score;
7156 // we will not use them for printing or
7157 // evaluating...
7158
7159 if (!((1 << rodt_ctl) &
7160 rodt_row_skip_mask) &&
7161 temp_score <= max_rank_score) {
7162 temp_rl_rank.u64 =
7163 rl_score[rtt_nom][rodt_ctl][orankx].setting;
7164 temp_byte =
7165 (int)(temp_rl_rank.u64 >> byte_sh) &
7166 RLEVEL_BYTE_MSK;
7167 sum += temp_byte;
7168 count++;
7169 }
7170 }
7171 }
7172 }
7173
7174 // this is the end of the BEST BYTE AVERAGING LOOP
7175
7176 // FIXME: validate count and sum??
7177 avg_byte = (int)divide_nint(sum, count);
7178 avg_diff = best_byte - avg_byte;
7179 new_byte = best_byte;
7180 if (avg_diff != 0) {
7181 // bump best up/dn by 1, not necessarily all the
7182 // way to avg
7183 new_byte = best_byte + ((avg_diff > 0) ? -1 : 1);
7184 }
7185
7186 if (rl_print) {
7187 debug("N%d.LMC%d.R%d: START: Byte %d: best %d is different by %d from average %d, using %d.\n",
7188 node, if_num, rankx,
7189 i, best_byte, avg_diff, avg_byte, new_byte);
7190 }
7191 best_byte = new_byte;
7192 count_byte = new_byte; // save the value we will count around
7193
7194 // At this point best_byte is either:
7195 // 1. the original byte-column value from the best scoring
7196 // RODT row, OR
7197 // 2. that value bumped toward the average of all the
7198 // byte-column values
7199 //
7200 // best_byte will not change from here on...
7201
7202 // this is the start of the BEST BYTE COUNTING LOOP
7203
7204 // NOTE: we do this next loop separately from above, because
7205 // we count relative to "best_byte"
7206 // which may have been modified by the above averaging
7207 // operation...
7208
7209 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
7210 ++rtt_idx) {
7211 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
7212
7213 for (rodt_ctl = max_rodt_ctl; rodt_ctl >= min_rodt_ctl;
7214 --rodt_ctl) {
7215 union cvmx_lmcx_rlevel_rankx temp_rl_rank;
7216 int temp_score;
7217
7218 for (orankx = 0; orankx < dimm_count * 4;
7219 orankx++) { // count over all the ranks
7220 // stay on the same DIMM
7221 if (!(dimm_rank_mask & (1 << orankx)))
7222 continue;
7223
7224 temp_score =
7225 rl_score[rtt_nom][rodt_ctl][orankx].score;
7226 // skip RODT rows in mask, or rows
7227 // with too high a score;
7228 // we will not use them for printing
7229 // or evaluating...
7230 if (((1 << rodt_ctl) &
7231 rodt_row_skip_mask) ||
7232 temp_score > max_rank_score)
7233 continue;
7234
7235 temp_rl_rank.u64 =
7236 rl_score[rtt_nom][rodt_ctl][orankx].setting;
7237 temp_byte = (temp_rl_rank.u64 >>
7238 byte_sh) & RLEVEL_BYTE_MSK;
7239
7240 if (temp_byte == 0)
7241 ; // do not count it if illegal
7242 else if (temp_byte == best_byte)
7243 count_same++;
7244 else if (temp_byte == best_byte - 1)
7245 count_less++;
7246 else if (temp_byte == best_byte + 1)
7247 count_more++;
7248 // else do not count anything more
7249 // than 1 away from the best
7250
7251 // no rank counting if disabled
7252 if (disable_rank_majority)
7253 continue;
7254
7255 // FIXME? count is relative to
7256 // best_byte; should it be rank-based?
7257 // rank counts only on main rank
7258 if (orankx != rankx)
7259 continue;
7260 else if (temp_byte == best_byte)
7261 rank_same++;
7262 else if (temp_byte == best_byte - 1)
7263 rank_less++;
7264 else if (temp_byte == best_byte + 1)
7265 rank_more++;
7266 }
7267 }
7268 }
7269
7270 if (rl_print) {
7271 debug("N%d.LMC%d.R%d: COUNT: Byte %d: orig %d now %d, more %d same %d less %d (%d/%d/%d)\n",
7272 node, if_num, rankx,
7273 i, orig_best_byte, best_byte,
7274 count_more, count_same, count_less,
7275 rank_more, rank_same, rank_less);
7276 }
7277
7278 // this is the end of the BEST BYTE COUNTING LOOP
7279
7280 // choose the new byte value
7281 // we need to check that there is no gap greater than 2
7282 // between adjacent bytes (adjacency depends on DIMM type)
7283 // use the neighbor value to help decide
7284 // initially, the rank_best_bytes[] will contain values from
7285 // the chosen lowest score rank
7286 new_byte = 0;
7287
7288 // neighbor is index-1 unless we are index 0 or index 8 (ECC)
7289 neighbor = (i == 8) ? 3 : ((i == 0) ? 1 : i - 1);
7290 neigh_byte = rank_best_bytes[neighbor];
7291
7292 // can go up or down or stay the same, so look at a numeric
7293 // average to help
7294 new_byte = (int)divide_nint(((count_more * (best_byte + 1)) +
7295 (count_same * (best_byte + 0)) +
7296 (count_less * (best_byte - 1))),
7297 max(1, (count_more + count_same +
7298 count_less)));
7299
7300 // use neighbor to help choose with average
7301 if (i > 0 && (abs(neigh_byte - new_byte) > 2) &&
7302 !disable_sequential_delay_check) {
7303 // but not for byte 0
7304 int avg_pick = new_byte;
7305
7306 if ((new_byte - best_byte) != 0) {
7307 // back to best, average did not get better
7308 new_byte = best_byte;
7309 } else {
7310 // avg was the same, still too far, now move
7311 // it towards the neighbor
7312 new_byte += (neigh_byte > new_byte) ? 1 : -1;
7313 }
7314
7315 if (rl_print) {
7316 debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: neighbor %d too different %d from average %d, picking %d.\n",
7317 node, if_num, rankx,
7318 i, neighbor, neigh_byte, avg_pick,
7319 new_byte);
7320 }
7321 } else {
7322 // NOTE:
7323 // For now, we let the neighbor processing above trump
7324 // the new simple majority processing here.
7325 // This is mostly because we have seen no smoking gun
7326 // for a neighbor bad choice (yet?).
7327 // Also note that we will ALWAYS be using byte 0
7328 // majority, because of the if clause above.
7329
7330 // majority is dependent on the counts, which are
7331 // relative to best_byte, so start there
7332 int maj_byte = best_byte;
7333 int rank_maj;
7334 int rank_sum;
7335
7336 if (count_more > count_same &&
7337 count_more > count_less) {
7338 maj_byte++;
7339 } else if (count_less > count_same &&
7340 count_less > count_more) {
7341 maj_byte--;
7342 }
7343
7344 if (maj_byte != new_byte) {
7345 // print only when majority choice is
7346 // different from average
7347 if (rl_print) {
7348 debug("N%d.LMC%d.R%d: MAJORTY: Byte %d: picking majority of %d over average %d.\n",
7349 node, if_num, rankx, i, maj_byte,
7350 new_byte);
7351 }
7352 new_byte = maj_byte;
7353 } else {
7354 if (rl_print) {
7355 debug("N%d.LMC%d.R%d: AVERAGE: Byte %d: picking average of %d.\n",
7356 node, if_num, rankx, i, new_byte);
7357 }
7358 }
7359
7360 if (!disable_rank_majority) {
7361 // rank majority is dependent on the rank
7362 // counts, which are relative to best_byte,
7363 // so start there, and adjust according to the
7364 // rank counts majority
7365 rank_maj = best_byte;
7366 if (rank_more > rank_same &&
7367 rank_more > rank_less) {
7368 rank_maj++;
7369 } else if (rank_less > rank_same &&
7370 rank_less > rank_more) {
7371 rank_maj--;
7372 }
7373 rank_sum = rank_more + rank_same + rank_less;
7374
7375 // now, let rank majority possibly rule over
7376 // the current new_byte however we got it
7377 if (rank_maj != new_byte) { // only if different
7378 // Here is where we decide whether to
7379 // completely apply RANK_MAJORITY or not
7380 // ignore if less than
7381 if (rank_maj < new_byte) {
7382 if (rl_print) {
7383 debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: LESS: NOT using %d over %d.\n",
7384 node, if_num,
7385 rankx, i,
7386 rank_maj,
7387 new_byte);
7388 }
7389 } else {
7390 // For the moment, we do it
7391 // ONLY when running 2-slot
7392 // configs
7393 // OR when rank_sum is big
7394 // enough
7395 if (dimm_count > 1 ||
7396 rank_sum > 2) {
7397 // print only when rank
7398 // majority choice is
7399 // selected
7400 if (rl_print) {
7401 debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: picking %d over %d.\n",
7402 node,
7403 if_num,
7404 rankx,
7405 i,
7406 rank_maj,
7407 new_byte);
7408 }
7409 new_byte = rank_maj;
7410 } else {
7411 // FIXME: print some
7412 // info when we could
7413 // have chosen RANKMAJ
7414 // but did not
7415 if (rl_print) {
7416 debug("N%d.LMC%d.R%d: RANKMAJ: Byte %d: NOT using %d over %d (best=%d,sum=%d).\n",
7417 node,
7418 if_num,
7419 rankx,
7420 i,
7421 rank_maj,
7422 new_byte,
7423 best_byte,
7424 rank_sum);
7425 }
7426 }
7427 }
7428 }
7429 } /* if (!disable_rank_majority) */
7430 }
7431 // one last check:
7432 // if new_byte is still count_byte, BUT there was no count
7433 // for that value, DO SOMETHING!!!
7434 // FIXME: go back to original best byte from the best row
7435 if (new_byte == count_byte && count_same == 0) {
7436 new_byte = orig_best_byte;
7437 if (rl_print) {
7438 debug("N%d.LMC%d.R%d: FAILSAF: Byte %d: going back to original %d.\n",
7439 node, if_num, rankx, i, new_byte);
7440 }
7441 }
7442 // Look at counts for "perfect" bitmasks (PBMs) if we had
7443 // any for this byte-lane.
7444 // Remember, we only counted for DDR4, so zero means none
7445 // or DDR3, and we bypass this...
7446 value_mask = rank_perf[rankx].mask[i];
7447 disable_rlv_bump_this_byte = 0;
7448
7449 if (value_mask != 0 && rl_ctl.cn78xx.offset == 1) {
7450 int i, delay_count, delay_max = 0, del_val = 0;
7451 int num_values = __builtin_popcountll(value_mask);
7452 int sum_counts = 0;
7453 u64 temp_mask = value_mask;
7454
7455 disable_rlv_bump_this_byte = 1;
7456 i = __builtin_ffsll(temp_mask) - 1;
7457 if (rl_print)
7458 debug("N%d.LMC%d.R%d: PERFECT: Byte %d: OFF1: mask 0x%02llx (%d): ",
7459 node, if_num, rankx, i, value_mask >> i,
7460 num_values);
7461
7462 while (temp_mask != 0) {
7463 i = __builtin_ffsll(temp_mask) - 1;
7464 delay_count = rank_perf[rankx].count[i][i];
7465 sum_counts += delay_count;
7466 if (rl_print)
7467 debug("%2d(%2d) ", i, delay_count);
7468 if (delay_count >= delay_max) {
7469 delay_max = delay_count;
7470 del_val = i;
7471 }
7472 temp_mask &= ~(1UL << i);
7473 } /* while (temp_mask != 0) */
7474
7475 // if sum_counts is small, just use NEW_BYTE
7476 if (sum_counts < pbm_lowsum_limit) {
7477 if (rl_print)
7478 debug(": LOWSUM (%2d), choose ORIG ",
7479 sum_counts);
7480 del_val = new_byte;
7481 delay_max = rank_perf[rankx].count[i][del_val];
7482 }
7483
7484 // finish printing here...
7485 if (rl_print) {
7486 debug(": USING %2d (%2d) D%d\n", del_val,
7487 delay_max, disable_rlv_bump_this_byte);
7488 }
7489
7490 new_byte = del_val; // override with best PBM choice
7491
7492 } else if ((value_mask != 0) && (rl_ctl.cn78xx.offset == 2)) {
7493 // if (value_mask != 0) {
7494 int i, delay_count, del_val;
7495 int num_values = __builtin_popcountll(value_mask);
7496 int sum_counts = 0;
7497 u64 temp_mask = value_mask;
7498
7499 i = __builtin_ffsll(temp_mask) - 1;
7500 if (rl_print)
7501 debug("N%d.LMC%d.R%d: PERFECT: Byte %d: mask 0x%02llx (%d): ",
7502 node, if_num, rankx, i, value_mask >> i,
7503 num_values);
7504 while (temp_mask != 0) {
7505 i = __builtin_ffsll(temp_mask) - 1;
7506 delay_count = rank_perf[rankx].count[i][i];
7507 sum_counts += delay_count;
7508 if (rl_print)
7509 debug("%2d(%2d) ", i, delay_count);
7510 temp_mask &= ~(1UL << i);
7511 } /* while (temp_mask != 0) */
7512
7513 del_val = __builtin_ffsll(value_mask) - 1;
7514 delay_count =
7515 rank_perf[rankx].count[i][del_val];
7516
7517 // overkill, normally only 1-4 bits
7518 i = (value_mask >> del_val) & 0x1F;
7519
7520 // if sum_counts is small, treat as special and use
7521 // NEW_BYTE
7522 if (sum_counts < pbm_lowsum_limit) {
7523 if (rl_print)
7524 debug(": LOWSUM (%2d), choose ORIG",
7525 sum_counts);
7526 i = 99; // SPECIAL case...
7527 }
7528
7529 switch (i) {
7530 case 0x01 /* 00001b */:
7531 // allow BUMP
7532 break;
7533
7534 case 0x13 /* 10011b */:
7535 case 0x0B /* 01011b */:
7536 case 0x03 /* 00011b */:
7537 del_val += 1; // take the second
7538 disable_rlv_bump_this_byte = 1; // allow no BUMP
7539 break;
7540
7541 case 0x0D /* 01101b */:
7542 case 0x05 /* 00101b */:
7543 // test count of lowest and all
7544 if (delay_count >= 5 || sum_counts <= 5)
7545 del_val += 1; // take the hole
7546 else
7547 del_val += 2; // take the next set
7548 disable_rlv_bump_this_byte = 1; // allow no BUMP
7549 break;
7550
7551 case 0x0F /* 01111b */:
7552 case 0x17 /* 10111b */:
7553 case 0x07 /* 00111b */:
7554 del_val += 1; // take the second
7555 if (delay_count < 5) { // lowest count is small
7556 int second =
7557 rank_perf[rankx].count[i][del_val];
7558 int third =
7559 rank_perf[rankx].count[i][del_val + 1];
7560 // test if middle is more than 1 OR
7561 // top is more than 1;
7562 // this means if they are BOTH 1,
7563 // then we keep the second...
7564 if (second > 1 || third > 1) {
7565 // if middle is small OR top
7566 // is large
7567 if (second < 5 ||
7568 third > 1) {
7569 // take the top
7570 del_val += 1;
7571 if (rl_print)
7572 debug(": TOP7 ");
7573 }
7574 }
7575 }
7576 disable_rlv_bump_this_byte = 1; // allow no BUMP
7577 break;
7578
7579 default: // all others...
7580 if (rl_print)
7581 debug(": ABNORMAL, choose ORIG");
7582
7583 case 99: // special
7584 // FIXME: choose original choice?
7585 del_val = new_byte;
7586 disable_rlv_bump_this_byte = 1; // allow no BUMP
7587 break;
7588 }
7589 delay_count =
7590 rank_perf[rankx].count[i][del_val];
7591
7592 // finish printing here...
7593 if (rl_print)
7594 debug(": USING %2d (%2d) D%d\n", del_val,
7595 delay_count, disable_rlv_bump_this_byte);
7596 new_byte = del_val; // override with best PBM choice
7597 } else {
7598 if (ddr_type == DDR4_DRAM) { // only report when DDR4
7599 // FIXME: remove or increase VBL for this
7600 // output...
7601 if (rl_print)
7602 debug("N%d.LMC%d.R%d: PERFECT: Byte %d: ZERO PBMs, USING %d\n",
7603 node, if_num, rankx, i,
7604 new_byte);
7605 // prevent ODD bump, rely on original
7606 disable_rlv_bump_this_byte = 1;
7607 }
7608 } /* if (value_mask != 0) */
7609
7610 // optionally bump the delay value
7611 if (enable_rldelay_bump && !disable_rlv_bump_this_byte) {
7612 if ((new_byte & enable_rldelay_bump) ==
7613 enable_rldelay_bump) {
7614 int bump_value = new_byte + rldelay_bump_incr;
7615
7616 if (rl_print) {
7617 debug("N%d.LMC%d.R%d: RLVBUMP: Byte %d: CHANGING %d to %d (%s)\n",
7618 node, if_num, rankx, i,
7619 new_byte, bump_value,
7620 (value_mask &
7621 (1 << bump_value)) ?
7622 "PBM" : "NOPBM");
7623 }
7624 new_byte = bump_value;
7625 }
7626 }
7627
7628 // last checks for count-related purposes
7629 if (new_byte == best_byte && count_more > 0 &&
7630 count_less == 0) {
7631 // we really should take best_byte + 1
7632 if (rl_print) {
7633 debug("N%d.LMC%d.R%d: CADJMOR: Byte %d: CHANGING %d to %d\n",
7634 node, if_num, rankx, i,
7635 new_byte, best_byte + 1);
7636 new_byte = best_byte + 1;
7637 }
7638 } else if ((new_byte < best_byte) && (count_same > 0)) {
7639 // we really should take best_byte
7640 if (rl_print) {
7641 debug("N%d.LMC%d.R%d: CADJSAM: Byte %d: CHANGING %d to %d\n",
7642 node, if_num, rankx, i,
7643 new_byte, best_byte);
7644 new_byte = best_byte;
7645 }
7646 } else if (new_byte > best_byte) {
7647 if ((new_byte == (best_byte + 1)) &&
7648 count_more == 0 && count_less > 0) {
7649 // we really should take best_byte
7650 if (rl_print) {
7651 debug("N%d.LMC%d.R%d: CADJLE1: Byte %d: CHANGING %d to %d\n",
7652 node, if_num, rankx, i,
7653 new_byte, best_byte);
7654 new_byte = best_byte;
7655 }
7656 } else if ((new_byte >= (best_byte + 2)) &&
7657 ((count_more > 0) || (count_same > 0))) {
7658 if (rl_print) {
7659 debug("N%d.LMC%d.R%d: CADJLE2: Byte %d: CHANGING %d to %d\n",
7660 node, if_num, rankx, i,
7661 new_byte, best_byte + 1);
7662 new_byte = best_byte + 1;
7663 }
7664 }
7665 }
7666
7667 if (rl_print) {
7668 debug("N%d.LMC%d.R%d: SUMMARY: Byte %d: orig %d now %d, more %d same %d less %d, using %d\n",
7669 node, if_num, rankx, i, orig_best_byte,
7670 best_byte, count_more, count_same, count_less,
7671 new_byte);
7672 }
7673
7674 // update the byte with the new value (NOTE: orig value in
7675 // the CSR may not be current "best")
7676 upd_rl_rank(&rl_rank, i, new_byte);
7677
7678 // save new best for neighbor use
7679 rank_best_bytes[i] = new_byte;
7680 } /* for (i = 0; i < 8+ecc_ena; i++) */
7681
7682 ////////////////// this is the end of the BEST BYTE LOOP
7683
7684 if (saved_rl_rank.u64 != rl_rank.u64) {
7685 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
7686 rl_rank.u64);
7687 rl_rank.u64 = lmc_rd(priv,
7688 CVMX_LMCX_RLEVEL_RANKX(rankx, if_num));
7689 debug("Adjusting Read-Leveling per-RANK settings.\n");
7690 } else {
7691 debug("Not Adjusting Read-Leveling per-RANK settings.\n");
7692 }
7693 display_rl_with_final(if_num, rl_rank, rankx);
7694
7695 // FIXME: does this help make the output a little easier to focus?
7696 if (rl_print > 0)
7697 debug("-----------\n");
7698
7699#define RLEVEL_RANKX_EXTRAS_INCR 0
7700 // if there are unused entries to be filled
7701 if ((rank_mask & 0x0f) != 0x0f) {
7702 // copy the current rank
7703 union cvmx_lmcx_rlevel_rankx temp_rl_rank = rl_rank;
7704
7705 if (rankx < 3) {
7706#if RLEVEL_RANKX_EXTRAS_INCR > 0
7707 int byte, delay;
7708
7709 // modify the copy in prep for writing to empty slot(s)
7710 for (byte = 0; byte < 9; byte++) {
7711 delay = get_rl_rank(&temp_rl_rank, byte) +
7712 RLEVEL_RANKX_EXTRAS_INCR;
7713 if (delay > RLEVEL_BYTE_MSK)
7714 delay = RLEVEL_BYTE_MSK;
7715 upd_rl_rank(&temp_rl_rank, byte, delay);
7716 }
7717#endif
7718
7719 // if rank 0, write rank 1 and rank 2 here if empty
7720 if (rankx == 0) {
7721 // check that rank 1 is empty
7722 if (!(rank_mask & (1 << 1))) {
7723 debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7724 node, if_num, rankx, 1);
7725 lmc_wr(priv,
7726 CVMX_LMCX_RLEVEL_RANKX(1,
7727 if_num),
7728 temp_rl_rank.u64);
7729 }
7730
7731 // check that rank 2 is empty
7732 if (!(rank_mask & (1 << 2))) {
7733 debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7734 node, if_num, rankx, 2);
7735 lmc_wr(priv,
7736 CVMX_LMCX_RLEVEL_RANKX(2,
7737 if_num),
7738 temp_rl_rank.u64);
7739 }
7740 }
7741
7742 // if ranks 0, 1 or 2, write rank 3 here if empty
7743 // check that rank 3 is empty
7744 if (!(rank_mask & (1 << 3))) {
7745 debug("N%d.LMC%d.R%d: writing RLEVEL_RANK unused entry R%d.\n",
7746 node, if_num, rankx, 3);
7747 lmc_wr(priv, CVMX_LMCX_RLEVEL_RANKX(3, if_num),
7748 temp_rl_rank.u64);
7749 }
7750 }
7751 }
7752}
7753
7754static void lmc_read_leveling(struct ddr_priv *priv)
7755{
7756 struct rl_score rl_score[RTT_NOM_OHMS_COUNT][RODT_OHMS_COUNT][4];
7757 union cvmx_lmcx_control ctl;
7758 union cvmx_lmcx_config cfg;
7759 int rankx;
7760 char *s;
7761 int i;
7762
7763 /*
7764 * 4.8.10 LMC Read Leveling
7765 *
7766 * LMC supports an automatic read-leveling separately per byte-lane
7767 * using the DDR3 multipurpose register predefined pattern for system
7768 * calibration defined in the JEDEC DDR3 specifications.
7769 *
7770 * All of DDR PLL, LMC CK, and LMC DRESET, and early LMC initializations
7771 * must be completed prior to starting this LMC read-leveling sequence.
7772 *
7773 * Software could simply write the desired read-leveling values into
7774 * LMC(0)_RLEVEL_RANK(0..3). This section describes a sequence that uses
7775 * LMC's autoread-leveling capabilities.
7776 *
7777 * When LMC does the read-leveling sequence for a rank, it first enables
7778 * the DDR3 multipurpose register predefined pattern for system
7779 * calibration on the selected DRAM rank via a DDR3 MR3 write, then
7780 * executes 64 RD operations at different internal delay settings, then
7781 * disables the predefined pattern via another DDR3 MR3 write
7782 * operation. LMC determines the pass or fail of each of the 64 settings
7783 * independently for each byte lane, then writes appropriate
7784 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] values for the rank.
7785 *
7786 * After read-leveling for a rank, software can read the 64 pass/fail
7787 * indications for one byte lane via LMC(0)_RLEVEL_DBG[BITMASK].
7788 * Software can observe all pass/fail results for all byte lanes in a
7789 * rank via separate read-leveling sequences on the rank with different
7790 * LMC(0)_RLEVEL_CTL[BYTE] values.
7791 *
7792 * The 64 pass/fail results will typically have failures for the low
7793 * delays, followed by a run of some passing settings, followed by more
7794 * failures in the remaining high delays. LMC sets
7795 * LMC(0)_RLEVEL_RANK(0..3)[BYTE*] to one of the passing settings.
7796 * First, LMC selects the longest run of successes in the 64 results.
7797 * (In the unlikely event that there is more than one longest run, LMC
7798 * selects the first one.) Then if LMC(0)_RLEVEL_CTL[OFFSET_EN] = 1 and
7799 * the selected run has more than LMC(0)_RLEVEL_CTL[OFFSET] successes,
7800 * LMC selects the last passing setting in the run minus
7801 * LMC(0)_RLEVEL_CTL[OFFSET]. Otherwise LMC selects the middle setting
7802 * in the run (rounding earlier when necessary). We expect the
7803 * read-leveling sequence to produce good results with the reset values
7804 * LMC(0)_RLEVEL_CTL [OFFSET_EN]=1, LMC(0)_RLEVEL_CTL[OFFSET] = 2.
7805 *
7806 * The read-leveling sequence has the following steps:
7807 *
7808 * 1. Select desired LMC(0)_RLEVEL_CTL[OFFSET_EN,OFFSET,BYTE] settings.
7809 * Do the remaining substeps 2-4 separately for each rank i with
7810 * attached DRAM.
7811 *
7812 * 2. Without changing any other fields in LMC(0)_CONFIG,
7813 *
7814 * o write LMC(0)_SEQ_CTL[SEQ_SEL] to select read-leveling
7815 *
7816 * o write LMC(0)_CONFIG[RANKMASK] = (1 << i)
7817 *
7818 * o write LMC(0)_SEQ_CTL[INIT_START] = 1
7819 *
7820 * This initiates the previously-described read-leveling.
7821 *
7822 * 3. Wait until LMC(0)_RLEVEL_RANKi[STATUS] != 2
7823 *
7824 * LMC will have updated LMC(0)_RLEVEL_RANKi[BYTE*] for all byte
7825 * lanes at this point.
7826 *
7827 * If ECC DRAM is not present (i.e. when DRAM is not attached to the
7828 * DDR_CBS_0_* and DDR_CB<7:0> chip signals, or the DDR_DQS_<4>_* and
7829 * DDR_DQ<35:32> chip signals), write LMC(0)_RLEVEL_RANK*[BYTE8] =
7830 * LMC(0)_RLEVEL_RANK*[BYTE0]. Write LMC(0)_RLEVEL_RANK*[BYTE4] =
7831 * LMC(0)_RLEVEL_RANK*[BYTE0].
7832 *
7833 * 4. If desired, consult LMC(0)_RLEVEL_DBG[BITMASK] and compare to
7834 * LMC(0)_RLEVEL_RANKi[BYTE*] for the lane selected by
7835 * LMC(0)_RLEVEL_CTL[BYTE]. If desired, modify
7836 * LMC(0)_RLEVEL_CTL[BYTE] to a new value and repeat so that all
7837 * BITMASKs can be observed.
7838 *
7839 * 5. Initialize LMC(0)_RLEVEL_RANK* values for all unused ranks.
7840 *
7841 * Let rank i be a rank with attached DRAM.
7842 *
7843 * For all ranks j that do not have attached DRAM, set
7844 * LMC(0)_RLEVEL_RANKj = LMC(0)_RLEVEL_RANKi.
7845 *
7846 * This read-leveling sequence can help select the proper CN70XX ODT
7847 * resistance value (LMC(0)_COMP_CTL2[RODT_CTL]). A hardware-generated
7848 * LMC(0)_RLEVEL_RANKi[BYTEj] value (for a used byte lane j) that is
7849 * drastically different from a neighboring LMC(0)_RLEVEL_RANKi[BYTEk]
7850 * (for a used byte lane k) can indicate that the CN70XX ODT value is
7851 * bad. It is possible to simultaneously optimize both
7852 * LMC(0)_COMP_CTL2[RODT_CTL] and LMC(0)_RLEVEL_RANKn[BYTE*] values by
7853 * performing this read-leveling sequence for several
7854 * LMC(0)_COMP_CTL2[RODT_CTL] values and selecting the one with the
7855 * best LMC(0)_RLEVEL_RANKn[BYTE*] profile for the ranks.
7856 */
7857
7858 rl_rodt_err = 0;
7859 rl_dbg_loops = 1;
7860 saved_int_zqcs_dis = 0;
7861 max_adj_rl_del_inc = 0;
7862 rl_print = RLEVEL_PRINTALL_DEFAULT;
7863
7864#ifdef ENABLE_HARDCODED_RLEVEL
7865 part_number[21] = {0};
7866#endif /* ENABLE_HARDCODED_RLEVEL */
7867
7868 pbm_lowsum_limit = 5; // FIXME: is this a good default?
7869 // FIXME: PBM skip for RODT 240 and 34
7870 pbm_rodt_skip = (1U << ddr4_rodt_ctl_240_ohm) |
7871 (1U << ddr4_rodt_ctl_34_ohm);
7872
7873 disable_rank_majority = 0; // control rank majority processing
7874
7875 // default to mask 11b ODDs for DDR4 (except 73xx), else DISABLE
7876 // for DDR3
7877 rldelay_bump_incr = 0;
7878 disable_rlv_bump_this_byte = 0;
7879
7880 enable_rldelay_bump = (ddr_type == DDR4_DRAM) ?
7881 ((octeon_is_cpuid(OCTEON_CN73XX)) ? 1 : 3) : 0;
7882
7883 s = lookup_env(priv, "ddr_disable_rank_majority");
7884 if (s)
7885 disable_rank_majority = !!simple_strtoul(s, NULL, 0);
7886
7887 s = lookup_env(priv, "ddr_pbm_lowsum_limit");
7888 if (s)
7889 pbm_lowsum_limit = simple_strtoul(s, NULL, 0);
7890
7891 s = lookup_env(priv, "ddr_pbm_rodt_skip");
7892 if (s)
7893 pbm_rodt_skip = simple_strtoul(s, NULL, 0);
7894 memset(rank_perf, 0, sizeof(rank_perf));
7895
7896 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
7897 save_ddr2t = ctl.cn78xx.ddr2t;
7898
7899 cfg.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(if_num));
7900 ecc_ena = cfg.cn78xx.ecc_ena;
7901
7902 s = lookup_env(priv, "ddr_rlevel_2t");
7903 if (s)
7904 ctl.cn78xx.ddr2t = simple_strtoul(s, NULL, 0);
7905
7906 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
7907
7908 debug("LMC%d: Performing Read-Leveling\n", if_num);
7909
7910 rl_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
7911
7912 rl_samples = c_cfg->rlevel_average_loops;
7913 if (rl_samples == 0) {
7914 rl_samples = RLEVEL_SAMPLES_DEFAULT;
7915 // up the samples for these cases
7916 if (dimm_count == 1 || num_ranks == 1)
7917 rl_samples = rl_samples * 2 + 1;
7918 }
7919
7920 rl_compute = c_cfg->rlevel_compute;
7921 rl_ctl.cn78xx.offset_en = c_cfg->offset_en;
7922 rl_ctl.cn78xx.offset = spd_rdimm
7923 ? c_cfg->offset_rdimm
7924 : c_cfg->offset_udimm;
7925
7926 int value = 1; // should ALWAYS be set
7927
7928 s = lookup_env(priv, "ddr_rlevel_delay_unload");
7929 if (s)
7930 value = !!simple_strtoul(s, NULL, 0);
7931 rl_ctl.cn78xx.delay_unload_0 = value;
7932 rl_ctl.cn78xx.delay_unload_1 = value;
7933 rl_ctl.cn78xx.delay_unload_2 = value;
7934 rl_ctl.cn78xx.delay_unload_3 = value;
7935
7936 // use OR_DIS=1 to try for better results
7937 rl_ctl.cn78xx.or_dis = 1;
7938
7939 /*
7940 * If we will be switching to 32bit mode level based on only
7941 * four bits because there are only 4 ECC bits.
7942 */
7943 rl_ctl.cn78xx.bitmask = (if_64b) ? 0xFF : 0x0F;
7944
7945 // allow overrides
7946 s = lookup_env(priv, "ddr_rlevel_ctl_or_dis");
7947 if (s)
7948 rl_ctl.cn78xx.or_dis = simple_strtoul(s, NULL, 0);
7949
7950 s = lookup_env(priv, "ddr_rlevel_ctl_bitmask");
7951 if (s)
7952 rl_ctl.cn78xx.bitmask = simple_strtoul(s, NULL, 0);
7953
7954 rl_comp_offs = spd_rdimm
7955 ? c_cfg->rlevel_comp_offset_rdimm
7956 : c_cfg->rlevel_comp_offset_udimm;
7957 s = lookup_env(priv, "ddr_rlevel_comp_offset");
7958 if (s)
7959 rl_comp_offs = strtoul(s, NULL, 0);
7960
7961 s = lookup_env(priv, "ddr_rlevel_offset");
7962 if (s)
7963 rl_ctl.cn78xx.offset = simple_strtoul(s, NULL, 0);
7964
7965 s = lookup_env(priv, "ddr_rlevel_offset_en");
7966 if (s)
7967 rl_ctl.cn78xx.offset_en = simple_strtoul(s, NULL, 0);
7968
7969 s = lookup_env(priv, "ddr_rlevel_ctl");
7970 if (s)
7971 rl_ctl.u64 = simple_strtoul(s, NULL, 0);
7972
7973 lmc_wr(priv,
7974 CVMX_LMCX_RLEVEL_CTL(if_num),
7975 rl_ctl.u64);
7976
7977 // do this here so we can look at final RLEVEL_CTL[offset] setting...
7978 s = lookup_env(priv, "ddr_enable_rldelay_bump");
7979 if (s) {
7980 // also use as mask bits
7981 enable_rldelay_bump = strtoul(s, NULL, 0);
7982 }
7983
7984 if (enable_rldelay_bump != 0)
7985 rldelay_bump_incr = (rl_ctl.cn78xx.offset == 1) ? -1 : 1;
7986
7987 s = lookup_env(priv, "ddr%d_rlevel_debug_loops", if_num);
7988 if (s)
7989 rl_dbg_loops = simple_strtoul(s, NULL, 0);
7990
7991 s = lookup_env(priv, "ddr_rtt_nom_auto");
7992 if (s)
7993 ddr_rtt_nom_auto = !!simple_strtoul(s, NULL, 0);
7994
7995 s = lookup_env(priv, "ddr_rlevel_average");
7996 if (s)
7997 rl_samples = simple_strtoul(s, NULL, 0);
7998
7999 s = lookup_env(priv, "ddr_rlevel_compute");
8000 if (s)
8001 rl_compute = simple_strtoul(s, NULL, 0);
8002
8003 s = lookup_env(priv, "ddr_rlevel_printall");
8004 if (s)
8005 rl_print = simple_strtoul(s, NULL, 0);
8006
8007 debug("RLEVEL_CTL : 0x%016llx\n",
8008 rl_ctl.u64);
8009 debug("RLEVEL_OFFSET : %6d\n",
8010 rl_ctl.cn78xx.offset);
8011 debug("RLEVEL_OFFSET_EN : %6d\n",
8012 rl_ctl.cn78xx.offset_en);
8013
8014 /*
8015 * The purpose for the indexed table is to sort the settings
8016 * by the ohm value to simplify the testing when incrementing
8017 * through the settings. (index => ohms) 1=120, 2=60, 3=40,
8018 * 4=30, 5=20
8019 */
8020 min_rtt_nom_idx = (c_cfg->min_rtt_nom_idx == 0) ?
8021 1 : c_cfg->min_rtt_nom_idx;
8022 max_rtt_nom_idx = (c_cfg->max_rtt_nom_idx == 0) ?
8023 5 : c_cfg->max_rtt_nom_idx;
8024
8025 min_rodt_ctl = (c_cfg->min_rodt_ctl == 0) ? 1 : c_cfg->min_rodt_ctl;
8026 max_rodt_ctl = (c_cfg->max_rodt_ctl == 0) ? 5 : c_cfg->max_rodt_ctl;
8027
8028 s = lookup_env(priv, "ddr_min_rodt_ctl");
8029 if (s)
8030 min_rodt_ctl = simple_strtoul(s, NULL, 0);
8031
8032 s = lookup_env(priv, "ddr_max_rodt_ctl");
8033 if (s)
8034 max_rodt_ctl = simple_strtoul(s, NULL, 0);
8035
8036 s = lookup_env(priv, "ddr_min_rtt_nom_idx");
8037 if (s)
8038 min_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8039
8040 s = lookup_env(priv, "ddr_max_rtt_nom_idx");
8041 if (s)
8042 max_rtt_nom_idx = simple_strtoul(s, NULL, 0);
8043
8044#ifdef ENABLE_HARDCODED_RLEVEL
8045 if (c_cfg->rl_tbl) {
8046 /* Check for hard-coded read-leveling settings */
8047 get_dimm_part_number(part_number, &dimm_config_table[0],
8048 0, ddr_type);
8049 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8050 if (!(rank_mask & (1 << rankx)))
8051 continue;
8052
8053 rl_rank.u64 = lmc_rd(priv,
8054 CVMX_LMCX_RLEVEL_RANKX(rankx,
8055 if_num));
8056
8057 i = 0;
8058 while (c_cfg->rl_tbl[i].part) {
8059 debug("DIMM part number:\"%s\", SPD: \"%s\"\n",
8060 c_cfg->rl_tbl[i].part, part_number);
8061 if ((strcmp(part_number,
8062 c_cfg->rl_tbl[i].part) == 0) &&
8063 (abs(c_cfg->rl_tbl[i].speed -
8064 2 * ddr_hertz / (1000 * 1000)) < 10)) {
8065 debug("Using hard-coded read leveling for DIMM part number: \"%s\"\n",
8066 part_number);
8067 rl_rank.u64 =
8068 c_cfg->rl_tbl[i].rl_rank[if_num][rankx];
8069 lmc_wr(priv,
8070 CVMX_LMCX_RLEVEL_RANKX(rankx,
8071 if_num),
8072 rl_rank.u64);
8073 rl_rank.u64 =
8074 lmc_rd(priv,
8075 CVMX_LMCX_RLEVEL_RANKX(rankx,
8076 if_num));
8077 display_rl(if_num, rl_rank, rankx);
8078 /* Disable h/w read-leveling */
8079 rl_dbg_loops = 0;
8080 break;
8081 }
8082 ++i;
8083 }
8084 }
8085 }
8086#endif /* ENABLE_HARDCODED_RLEVEL */
8087
8088 max_adj_rl_del_inc = c_cfg->maximum_adjacent_rlevel_delay_increment;
8089 s = lookup_env(priv, "ddr_maximum_adjacent_rlevel_delay_increment");
8090 if (s)
8091 max_adj_rl_del_inc = strtoul(s, NULL, 0);
8092
8093 while (rl_dbg_loops--) {
8094 union cvmx_lmcx_modereg_params1 mp1;
8095 union cvmx_lmcx_comp_ctl2 cc2;
8096
8097 /* Initialize the error scoreboard */
8098 memset(rl_score, 0, sizeof(rl_score));
8099
8100 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8101 saved_ddr__ptune = cc2.cn78xx.ddr__ptune;
8102 saved_ddr__ntune = cc2.cn78xx.ddr__ntune;
8103
8104 /* Disable dynamic compensation settings */
8105 if (rl_comp_offs != 0) {
8106 cc2.cn78xx.ptune = saved_ddr__ptune;
8107 cc2.cn78xx.ntune = saved_ddr__ntune;
8108
8109 /*
8110 * Round up the ptune calculation to bias the odd
8111 * cases toward ptune
8112 */
8113 cc2.cn78xx.ptune += divide_roundup(rl_comp_offs, 2);
8114 cc2.cn78xx.ntune -= rl_comp_offs / 2;
8115
8116 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8117 saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8118 /* Disable ZQCS while in bypass. */
8119 ctl.s.int_zqcs_dis = 1;
8120 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8121
8122 cc2.cn78xx.byp = 1; /* Enable bypass mode */
8123 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8124 lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8125 /* Read again */
8126 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8127 debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
8128 cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8129 }
8130
8131 mp1.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num));
8132
8133 for (rtt_idx = min_rtt_nom_idx; rtt_idx <= max_rtt_nom_idx;
8134 ++rtt_idx) {
8135 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8136
8137 /*
8138 * When the read ODT mask is zero the dyn_rtt_nom_mask
8139 * is zero than RTT_NOM will not be changing during
8140 * read-leveling. Since the value is fixed we only need
8141 * to test it once.
8142 */
8143 if (dyn_rtt_nom_mask == 0) {
8144 // flag not to print NOM ohms
8145 print_nom_ohms = -1;
8146 } else {
8147 if (dyn_rtt_nom_mask & 1)
8148 mp1.s.rtt_nom_00 = rtt_nom;
8149 if (dyn_rtt_nom_mask & 2)
8150 mp1.s.rtt_nom_01 = rtt_nom;
8151 if (dyn_rtt_nom_mask & 4)
8152 mp1.s.rtt_nom_10 = rtt_nom;
8153 if (dyn_rtt_nom_mask & 8)
8154 mp1.s.rtt_nom_11 = rtt_nom;
8155 // FIXME? rank 0 ohms always?
8156 print_nom_ohms =
8157 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00];
8158 }
8159
8160 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8161 mp1.u64);
8162
8163 if (print_nom_ohms >= 0 && rl_print > 1) {
8164 debug("\n");
8165 debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8166 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8167 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8168 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8169 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8170 mp1.s.rtt_nom_11,
8171 mp1.s.rtt_nom_10,
8172 mp1.s.rtt_nom_01,
8173 mp1.s.rtt_nom_00);
8174 }
8175
8176 ddr_init_seq(priv, rank_mask, if_num);
8177
8178 // Try RANK outside RODT to rearrange the output...
8179 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8180 if (!(rank_mask & (1 << rankx)))
8181 continue;
8182
8183 for (rodt_ctl = max_rodt_ctl;
8184 rodt_ctl >= min_rodt_ctl; --rodt_ctl)
8185 rodt_loop(priv, rankx, rl_score);
8186 }
8187 }
8188
8189 /* Re-enable dynamic compensation settings. */
8190 if (rl_comp_offs != 0) {
8191 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8192
8193 cc2.cn78xx.ptune = 0;
8194 cc2.cn78xx.ntune = 0;
8195 cc2.cn78xx.byp = 0; /* Disable bypass mode */
8196 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8197 /* Read once */
8198 lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8199
8200 /* Read again */
8201 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8202 debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
8203 cc2.cn78xx.ddr__ptune, cc2.cn78xx.ddr__ntune);
8204
8205 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8206 /* Restore original setting */
8207 ctl.s.int_zqcs_dis = saved_int_zqcs_dis;
8208 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8209 }
8210
8211 int override_compensation = 0;
8212
8213 s = lookup_env(priv, "ddr__ptune");
8214 if (s)
8215 saved_ddr__ptune = strtoul(s, NULL, 0);
8216
8217 s = lookup_env(priv, "ddr__ntune");
8218 if (s) {
8219 saved_ddr__ntune = strtoul(s, NULL, 0);
8220 override_compensation = 1;
8221 }
8222
8223 if (override_compensation) {
8224 cc2.cn78xx.ptune = saved_ddr__ptune;
8225 cc2.cn78xx.ntune = saved_ddr__ntune;
8226
8227 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8228 saved_int_zqcs_dis = ctl.s.int_zqcs_dis;
8229 /* Disable ZQCS while in bypass. */
8230 ctl.s.int_zqcs_dis = 1;
8231 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8232
8233 cc2.cn78xx.byp = 1; /* Enable bypass mode */
8234 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8235 /* Read again */
8236 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8237
8238 debug("DDR__PTUNE/DDR__NTUNE : %d/%d\n",
8239 cc2.cn78xx.ptune, cc2.cn78xx.ntune);
8240 }
8241
8242 /* Evaluation block */
8243 /* Still at initial value? */
8244 int best_rodt_score = DEFAULT_BEST_RANK_SCORE;
8245 int auto_rodt_ctl = 0;
8246 int auto_rtt_nom = 0;
8247 int rodt_score;
8248
8249 rodt_row_skip_mask = 0;
8250
8251 // just add specific RODT rows to the skip mask for DDR4
8252 // at this time...
8253 if (ddr_type == DDR4_DRAM) {
8254 // skip RODT row 34 ohms for all DDR4 types
8255 rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_34_ohm);
8256 // skip RODT row 40 ohms for all DDR4 types
8257 rodt_row_skip_mask |= (1 << ddr4_rodt_ctl_40_ohm);
8258 // For now, do not skip RODT row 40 or 48 ohm when
8259 // ddr_hertz is above 1075 MHz
8260 if (ddr_hertz > 1075000000) {
8261 // noskip RODT row 40 ohms
8262 rodt_row_skip_mask &=
8263 ~(1 << ddr4_rodt_ctl_40_ohm);
8264 // noskip RODT row 48 ohms
8265 rodt_row_skip_mask &=
8266 ~(1 << ddr4_rodt_ctl_48_ohm);
8267 }
8268 // For now, do not skip RODT row 48 ohm for 2Rx4
8269 // stacked die DIMMs
8270 if (is_stacked_die && num_ranks == 2 &&
8271 dram_width == 4) {
8272 // noskip RODT row 48 ohms
8273 rodt_row_skip_mask &=
8274 ~(1 << ddr4_rodt_ctl_48_ohm);
8275 }
8276 // for now, leave all rows eligible when we have
8277 // mini-DIMMs...
8278 if (spd_dimm_type == 5 || spd_dimm_type == 6)
8279 rodt_row_skip_mask = 0;
8280 // for now, leave all rows eligible when we have
8281 // a 2-slot 1-rank config
8282 if (dimm_count == 2 && num_ranks == 1)
8283 rodt_row_skip_mask = 0;
8284
8285 debug("Evaluating Read-Leveling Scoreboard for AUTO settings.\n");
8286 for (rtt_idx = min_rtt_nom_idx;
8287 rtt_idx <= max_rtt_nom_idx; ++rtt_idx) {
8288 rtt_nom = imp_val->rtt_nom_table[rtt_idx];
8289
8290 for (rodt_ctl = max_rodt_ctl;
8291 rodt_ctl >= min_rodt_ctl; --rodt_ctl) {
8292 rodt_score = 0;
8293 for (rankx = 0; rankx < dimm_count * 4;
8294 rankx++) {
8295 if (!(rank_mask & (1 << rankx)))
8296 continue;
8297
8298 debug("rl_score[rtt_nom=%d][rodt_ctl=%d][rankx=%d].score:%d\n",
8299 rtt_nom, rodt_ctl, rankx,
8300 rl_score[rtt_nom][rodt_ctl][rankx].score);
8301 rodt_score +=
8302 rl_score[rtt_nom][rodt_ctl][rankx].score;
8303 }
8304 // FIXME: do we need to skip RODT rows
8305 // here, like we do below in the
8306 // by-RANK settings?
8307
8308 /*
8309 * When using automatic ODT settings use
8310 * the ODT settings associated with the
8311 * best score for all of the tested ODT
8312 * combinations.
8313 */
8314
8315 if (rodt_score < best_rodt_score ||
8316 (rodt_score == best_rodt_score &&
8317 (imp_val->rodt_ohms[rodt_ctl] >
8318 imp_val->rodt_ohms[auto_rodt_ctl]))) {
8319 debug("AUTO: new best score for rodt:%d (%d), new score:%d, previous score:%d\n",
8320 rodt_ctl,
8321 imp_val->rodt_ohms[rodt_ctl],
8322 rodt_score,
8323 best_rodt_score);
8324 best_rodt_score = rodt_score;
8325 auto_rodt_ctl = rodt_ctl;
8326 auto_rtt_nom = rtt_nom;
8327 }
8328 }
8329 }
8330
8331 mp1.u64 = lmc_rd(priv,
8332 CVMX_LMCX_MODEREG_PARAMS1(if_num));
8333
8334 if (ddr_rtt_nom_auto) {
8335 /* Store the automatically set RTT_NOM value */
8336 if (dyn_rtt_nom_mask & 1)
8337 mp1.s.rtt_nom_00 = auto_rtt_nom;
8338 if (dyn_rtt_nom_mask & 2)
8339 mp1.s.rtt_nom_01 = auto_rtt_nom;
8340 if (dyn_rtt_nom_mask & 4)
8341 mp1.s.rtt_nom_10 = auto_rtt_nom;
8342 if (dyn_rtt_nom_mask & 8)
8343 mp1.s.rtt_nom_11 = auto_rtt_nom;
8344 } else {
8345 /*
8346 * restore the manual settings to the register
8347 */
8348 mp1.s.rtt_nom_00 = default_rtt_nom[0];
8349 mp1.s.rtt_nom_01 = default_rtt_nom[1];
8350 mp1.s.rtt_nom_10 = default_rtt_nom[2];
8351 mp1.s.rtt_nom_11 = default_rtt_nom[3];
8352 }
8353
8354 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS1(if_num),
8355 mp1.u64);
8356 debug("RTT_NOM %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8357 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_11],
8358 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_10],
8359 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_01],
8360 imp_val->rtt_nom_ohms[mp1.s.rtt_nom_00],
8361 mp1.s.rtt_nom_11,
8362 mp1.s.rtt_nom_10,
8363 mp1.s.rtt_nom_01,
8364 mp1.s.rtt_nom_00);
8365
8366 debug("RTT_WR %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8367 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 3)],
8368 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 2)],
8369 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 1)],
8370 imp_val->rtt_wr_ohms[extr_wr(mp1.u64, 0)],
8371 extr_wr(mp1.u64, 3),
8372 extr_wr(mp1.u64, 2),
8373 extr_wr(mp1.u64, 1),
8374 extr_wr(mp1.u64, 0));
8375
8376 debug("DIC %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8377 imp_val->dic_ohms[mp1.s.dic_11],
8378 imp_val->dic_ohms[mp1.s.dic_10],
8379 imp_val->dic_ohms[mp1.s.dic_01],
8380 imp_val->dic_ohms[mp1.s.dic_00],
8381 mp1.s.dic_11,
8382 mp1.s.dic_10,
8383 mp1.s.dic_01,
8384 mp1.s.dic_00);
8385
8386 if (ddr_type == DDR4_DRAM) {
8387 union cvmx_lmcx_modereg_params2 mp2;
8388 /*
8389 * We must read the CSR, and not depend on
8390 * odt_config[odt_idx].odt_mask2, since we could
8391 * have overridden values with envvars.
8392 * NOTE: this corrects the printout, since the
8393 * CSR is not written with the old values...
8394 */
8395 mp2.u64 = lmc_rd(priv,
8396 CVMX_LMCX_MODEREG_PARAMS2(if_num));
8397
8398 debug("RTT_PARK %3d, %3d, %3d, %3d ohms : %x,%x,%x,%x\n",
8399 imp_val->rtt_nom_ohms[mp2.s.rtt_park_11],
8400 imp_val->rtt_nom_ohms[mp2.s.rtt_park_10],
8401 imp_val->rtt_nom_ohms[mp2.s.rtt_park_01],
8402 imp_val->rtt_nom_ohms[mp2.s.rtt_park_00],
8403 mp2.s.rtt_park_11,
8404 mp2.s.rtt_park_10,
8405 mp2.s.rtt_park_01,
8406 mp2.s.rtt_park_00);
8407
8408 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n",
8409 "VREF_RANGE",
8410 mp2.s.vref_range_11,
8411 mp2.s.vref_range_10,
8412 mp2.s.vref_range_01,
8413 mp2.s.vref_range_00);
8414
8415 debug("%-45s : 0x%x,0x%x,0x%x,0x%x\n",
8416 "VREF_VALUE",
8417 mp2.s.vref_value_11,
8418 mp2.s.vref_value_10,
8419 mp2.s.vref_value_01,
8420 mp2.s.vref_value_00);
8421 }
8422
8423 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8424 if (ddr_rodt_ctl_auto) {
8425 cc2.cn78xx.rodt_ctl = auto_rodt_ctl;
8426 } else {
8427 // back to the original setting
8428 cc2.cn78xx.rodt_ctl = default_rodt_ctl;
8429 }
8430 lmc_wr(priv, CVMX_LMCX_COMP_CTL2(if_num), cc2.u64);
8431 cc2.u64 = lmc_rd(priv, CVMX_LMCX_COMP_CTL2(if_num));
8432 debug("Read ODT_CTL : 0x%x (%d ohms)\n",
8433 cc2.cn78xx.rodt_ctl,
8434 imp_val->rodt_ohms[cc2.cn78xx.rodt_ctl]);
8435
8436 /*
8437 * Use the delays associated with the best score for
8438 * each individual rank
8439 */
8440 debug("Evaluating Read-Leveling Scoreboard for per-RANK settings.\n");
8441
8442 // this is the the RANK MAJOR LOOP
8443 for (rankx = 0; rankx < dimm_count * 4; rankx++)
8444 rank_major_loop(priv, rankx, rl_score);
8445 } /* Evaluation block */
8446 } /* while(rl_dbg_loops--) */
8447
8448 ctl.cn78xx.ddr2t = save_ddr2t;
8449 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctl.u64);
8450 ctl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
8451 /* Display final 2T value */
8452 debug("DDR2T : %6d\n",
8453 ctl.cn78xx.ddr2t);
8454
8455 ddr_init_seq(priv, rank_mask, if_num);
8456
8457 for (rankx = 0; rankx < dimm_count * 4; rankx++) {
8458 u64 value;
8459 int parameter_set = 0;
8460
8461 if (!(rank_mask & (1 << rankx)))
8462 continue;
8463
8464 rl_rank.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_RANKX(rankx,
8465 if_num));
8466
8467 for (i = 0; i < 9; ++i) {
8468 s = lookup_env(priv, "ddr%d_rlevel_rank%d_byte%d",
8469 if_num, rankx, i);
8470 if (s) {
8471 parameter_set |= 1;
8472 value = simple_strtoul(s, NULL, 0);
8473
8474 upd_rl_rank(&rl_rank, i, value);
8475 }
8476 }
8477
8478 s = lookup_env_ull(priv, "ddr%d_rlevel_rank%d", if_num, rankx);
8479 if (s) {
8480 parameter_set |= 1;
8481 value = simple_strtoull(s, NULL, 0);
8482 rl_rank.u64 = value;
8483 }
8484
8485 if (parameter_set) {
8486 lmc_wr(priv,
8487 CVMX_LMCX_RLEVEL_RANKX(rankx, if_num),
8488 rl_rank.u64);
8489 rl_rank.u64 = lmc_rd(priv,
8490 CVMX_LMCX_RLEVEL_RANKX(rankx,
8491 if_num));
8492 display_rl(if_num, rl_rank, rankx);
8493 }
8494 }
8495}
8496
8497int init_octeon3_ddr3_interface(struct ddr_priv *priv,
8498 struct ddr_conf *_ddr_conf, u32 _ddr_hertz,
8499 u32 cpu_hertz, u32 ddr_ref_hertz, int _if_num,
8500 u32 _if_mask)
8501{
8502 union cvmx_lmcx_control ctrl;
8503 int ret;
8504 char *s;
8505 int i;
8506
8507 if_num = _if_num;
8508 ddr_hertz = _ddr_hertz;
8509 ddr_conf = _ddr_conf;
8510 if_mask = _if_mask;
8511 odt_1rank_config = ddr_conf->odt_1rank_config;
8512 odt_2rank_config = ddr_conf->odt_2rank_config;
8513 odt_4rank_config = ddr_conf->odt_4rank_config;
8514 dimm_config_table = ddr_conf->dimm_config_table;
8515 c_cfg = &ddr_conf->custom_lmc_config;
8516
8517 /*
8518 * Compute clock rates to the nearest picosecond.
8519 */
8520 tclk_psecs = hertz_to_psecs(ddr_hertz); /* Clock in psecs */
8521 eclk_psecs = hertz_to_psecs(cpu_hertz); /* Clock in psecs */
8522
8523 dimm_count = 0;
8524 /* Accumulate and report all the errors before giving up */
8525 fatal_error = 0;
8526
8527 /* Flag that indicates safe DDR settings should be used */
8528 safe_ddr_flag = 0;
8529 if_64b = 1; /* Octeon II Default: 64bit interface width */
8530 mem_size_mbytes = 0;
8531 bank_bits = 0;
8532 column_bits_start = 1;
8533 use_ecc = 1;
8534 min_cas_latency = 0, max_cas_latency = 0, override_cas_latency = 0;
8535 spd_package = 0;
8536 spd_rawcard = 0;
8537 spd_rawcard_aorb = 0;
8538 spd_rdimm_registers = 0;
8539 is_stacked_die = 0;
8540 is_3ds_dimm = 0; // 3DS
8541 lranks_per_prank = 1; // 3DS: logical ranks per package rank
8542 lranks_bits = 0; // 3DS: logical ranks bits
8543 die_capacity = 0; // in Mbits; only used for 3DS
8544
8545 wl_mask_err = 0;
8546 dyn_rtt_nom_mask = 0;
8547 ddr_disable_chip_reset = 1;
8548 match_wl_rtt_nom = 0;
8549
8550 internal_retries = 0;
8551
8552 disable_deskew_training = 0;
8553 restart_if_dsk_incomplete = 0;
8554 last_lane = ((if_64b) ? 8 : 4) + use_ecc;
8555
8556 disable_sequential_delay_check = 0;
8557 wl_print = WLEVEL_PRINTALL_DEFAULT;
8558
8559 enable_by_rank_init = 1; // FIXME: default by-rank ON
8560 saved_rank_mask = 0;
8561
8562 node = 0;
8563
8564 memset(hwl_alts, 0, sizeof(hwl_alts));
8565
8566 /*
8567 * Initialize these to shut up the compiler. They are configured
8568 * and used only for DDR4
8569 */
8570 ddr4_trrd_lmin = 6000;
8571 ddr4_tccd_lmin = 6000;
8572
8573 debug("\nInitializing node %d DDR interface %d, DDR Clock %d, DDR Reference Clock %d, CPUID 0x%08x\n",
8574 node, if_num, ddr_hertz, ddr_ref_hertz, read_c0_prid());
8575
8576 if (dimm_config_table[0].spd_addrs[0] == 0 &&
8577 !dimm_config_table[0].spd_ptrs[0]) {
8578 printf("ERROR: No dimms specified in the dimm_config_table.\n");
8579 return -1;
8580 }
8581
8582 // allow some overrides to be done
8583
8584 // this one controls several things related to DIMM geometry: HWL and RL
8585 disable_sequential_delay_check = c_cfg->disable_sequential_delay_check;
8586 s = lookup_env(priv, "ddr_disable_sequential_delay_check");
8587 if (s)
8588 disable_sequential_delay_check = strtoul(s, NULL, 0);
8589
8590 // this one controls whether chip RESET is done, or LMC init restarted
8591 // from step 6.9.6
8592 s = lookup_env(priv, "ddr_disable_chip_reset");
8593 if (s)
8594 ddr_disable_chip_reset = !!strtoul(s, NULL, 0);
8595
8596 // this one controls whether Deskew Training is performed
8597 s = lookup_env(priv, "ddr_disable_deskew_training");
8598 if (s)
8599 disable_deskew_training = !!strtoul(s, NULL, 0);
8600
8601 if (ddr_verbose(priv)) {
8602 printf("DDR SPD Table:");
8603 for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8604 if (dimm_config_table[didx].spd_addrs[0] == 0)
8605 break;
8606
8607 printf(" --ddr%dspd=0x%02x", if_num,
8608 dimm_config_table[didx].spd_addrs[0]);
8609 if (dimm_config_table[didx].spd_addrs[1] != 0)
8610 printf(",0x%02x",
8611 dimm_config_table[didx].spd_addrs[1]);
8612 }
8613 printf("\n");
8614 }
8615
8616 /*
8617 * Walk the DRAM Socket Configuration Table to see what is installed.
8618 */
8619 for (didx = 0; didx < DDR_CFG_T_MAX_DIMMS; ++didx) {
8620 /* Check for lower DIMM socket populated */
8621 if (validate_dimm(priv, &dimm_config_table[didx], 0)) {
8622 if (ddr_verbose(priv))
8623 report_dimm(&dimm_config_table[didx], 0,
8624 dimm_count, if_num);
8625 ++dimm_count;
8626 } else {
8627 break;
8628 } /* Finished when there is no lower DIMM */
8629 }
8630
8631 initialize_ddr_clock(priv, ddr_conf, cpu_hertz, ddr_hertz,
8632 ddr_ref_hertz, if_num, if_mask);
8633
8634 if (!odt_1rank_config)
8635 odt_1rank_config = disable_odt_config;
8636 if (!odt_2rank_config)
8637 odt_2rank_config = disable_odt_config;
8638 if (!odt_4rank_config)
8639 odt_4rank_config = disable_odt_config;
8640
8641 s = env_get("ddr_safe");
8642 if (s) {
8643 safe_ddr_flag = !!simple_strtoul(s, NULL, 0);
8644 printf("Parameter found in environment. ddr_safe = %d\n",
8645 safe_ddr_flag);
8646 }
8647
8648 if (dimm_count == 0) {
8649 printf("ERROR: DIMM 0 not detected.\n");
8650 return (-1);
8651 }
8652
8653 if (c_cfg->mode32b)
8654 if_64b = 0;
8655
8656 s = lookup_env(priv, "if_64b");
8657 if (s)
8658 if_64b = !!simple_strtoul(s, NULL, 0);
8659
8660 if (if_64b == 1) {
8661 if (octeon_is_cpuid(OCTEON_CN70XX)) {
8662 printf("64-bit interface width is not supported for this Octeon model\n");
8663 ++fatal_error;
8664 }
8665 }
8666
8667 /* ddr_type only indicates DDR4 or DDR3 */
8668 ddr_type = (read_spd(&dimm_config_table[0], 0,
8669 DDR4_SPD_KEY_BYTE_DEVICE_TYPE) == 0x0C) ? 4 : 3;
8670 debug("DRAM Device Type: DDR%d\n", ddr_type);
8671
8672 if (ddr_type == DDR4_DRAM) {
8673 int spd_module_type;
8674 int asymmetric;
8675 const char *signal_load[4] = { "", "MLS", "3DS", "RSV" };
8676
8677 imp_val = &ddr4_impedence_val;
8678
8679 spd_addr =
8680 read_spd(&dimm_config_table[0], 0,
8681 DDR4_SPD_ADDRESSING_ROW_COL_BITS);
8682 spd_org =
8683 read_spd(&dimm_config_table[0], 0,
8684 DDR4_SPD_MODULE_ORGANIZATION);
8685 spd_banks =
8686 0xFF & read_spd(&dimm_config_table[0], 0,
8687 DDR4_SPD_DENSITY_BANKS);
8688
8689 bank_bits =
8690 (2 + ((spd_banks >> 4) & 0x3)) + ((spd_banks >> 6) & 0x3);
8691 /* Controller can only address 4 bits. */
8692 bank_bits = min((int)bank_bits, 4);
8693
8694 spd_package =
8695 0XFF & read_spd(&dimm_config_table[0], 0,
8696 DDR4_SPD_PACKAGE_TYPE);
8697 if (spd_package & 0x80) { // non-monolithic device
8698 is_stacked_die = ((spd_package & 0x73) == 0x11);
8699 debug("DDR4: Package Type 0x%02x (%s), %d die\n",
8700 spd_package, signal_load[(spd_package & 3)],
8701 ((spd_package >> 4) & 7) + 1);
8702 is_3ds_dimm = ((spd_package & 3) == 2); // is it 3DS?
8703 if (is_3ds_dimm) { // is it 3DS?
8704 lranks_per_prank = ((spd_package >> 4) & 7) + 1;
8705 // FIXME: should make sure it is only 2H or 4H
8706 // or 8H?
8707 lranks_bits = lranks_per_prank >> 1;
8708 if (lranks_bits == 4)
8709 lranks_bits = 3;
8710 }
8711 } else if (spd_package != 0) {
8712 // FIXME: print non-zero monolithic device definition
8713 debug("DDR4: Package Type MONOLITHIC: %d die, signal load %d\n",
8714 ((spd_package >> 4) & 7) + 1, (spd_package & 3));
8715 }
8716
8717 asymmetric = (spd_org >> 6) & 1;
8718 if (asymmetric) {
8719 int spd_secondary_pkg =
8720 read_spd(&dimm_config_table[0], 0,
8721 DDR4_SPD_SECONDARY_PACKAGE_TYPE);
8722 debug("DDR4: Module Organization: ASYMMETRICAL: Secondary Package Type 0x%02x\n",
8723 spd_secondary_pkg);
8724 } else {
8725 u64 bus_width =
8726 8 << (0x07 &
8727 read_spd(&dimm_config_table[0], 0,
8728 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH));
8729 u64 ddr_width = 4 << ((spd_org >> 0) & 0x7);
8730 u64 module_cap;
8731 int shift = (spd_banks & 0x0F);
8732
8733 die_capacity = (shift < 8) ? (256UL << shift) :
8734 ((12UL << (shift & 1)) << 10);
8735 debug("DDR4: Module Organization: SYMMETRICAL: capacity per die %d %cbit\n",
8736 (die_capacity > 512) ? (die_capacity >> 10) :
8737 die_capacity, (die_capacity > 512) ? 'G' : 'M');
8738 module_cap = ((u64)die_capacity << 20) / 8UL *
8739 bus_width / ddr_width *
8740 (1UL + ((spd_org >> 3) & 0x7));
8741
8742 // is it 3DS?
8743 if (is_3ds_dimm) {
8744 module_cap *= (u64)(((spd_package >> 4) & 7) +
8745 1);
8746 }
8747 debug("DDR4: Module Organization: SYMMETRICAL: capacity per module %lld GB\n",
8748 module_cap >> 30);
8749 }
8750
8751 spd_rawcard =
8752 0xFF & read_spd(&dimm_config_table[0], 0,
8753 DDR4_SPD_REFERENCE_RAW_CARD);
8754 debug("DDR4: Reference Raw Card 0x%02x\n", spd_rawcard);
8755
8756 spd_module_type =
8757 read_spd(&dimm_config_table[0], 0,
8758 DDR4_SPD_KEY_BYTE_MODULE_TYPE);
8759 if (spd_module_type & 0x80) { // HYBRID module
8760 debug("DDR4: HYBRID module, type %s\n",
8761 ((spd_module_type & 0x70) ==
8762 0x10) ? "NVDIMM" : "UNKNOWN");
8763 }
8764 spd_thermal_sensor =
8765 read_spd(&dimm_config_table[0], 0,
8766 DDR4_SPD_MODULE_THERMAL_SENSOR);
8767 spd_dimm_type = spd_module_type & 0x0F;
8768 spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8769 (spd_dimm_type == 8);
8770 if (spd_rdimm) {
8771 u16 spd_mfgr_id, spd_register_rev, spd_mod_attr;
8772 static const u16 manu_ids[4] = {
8773 0xb380, 0x3286, 0x9780, 0xb304
8774 };
8775 static const char *manu_names[4] = {
8776 "XXX", "XXXXXXX", "XX", "XXXXX"
8777 };
8778 int mc;
8779
8780 spd_mfgr_id =
8781 (0xFFU &
8782 read_spd(&dimm_config_table[0], 0,
8783 DDR4_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8784 ((0xFFU &
8785 read_spd(&dimm_config_table[0], 0,
8786 DDR4_SPD_REGISTER_MANUFACTURER_ID_MSB))
8787 << 8);
8788 spd_register_rev =
8789 0xFFU & read_spd(&dimm_config_table[0], 0,
8790 DDR4_SPD_REGISTER_REVISION_NUMBER);
8791 for (mc = 0; mc < 4; mc++)
8792 if (manu_ids[mc] == spd_mfgr_id)
8793 break;
8794
8795 debug("DDR4: RDIMM Register Manufacturer ID: %s, Revision: 0x%02x\n",
8796 (mc >= 4) ? "UNKNOWN" : manu_names[mc],
8797 spd_register_rev);
8798
8799 // RAWCARD A or B must be bit 7=0 and bits 4-0
8800 // either 00000(A) or 00001(B)
8801 spd_rawcard_aorb = ((spd_rawcard & 0x9fUL) <= 1);
8802 // RDIMM Module Attributes
8803 spd_mod_attr =
8804 0xFFU & read_spd(&dimm_config_table[0], 0,
8805 DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE);
8806 spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8807 debug("DDR4: RDIMM Module Attributes (0x%02x): Register Type DDR4RCD%02d, DRAM rows %d, Registers %d\n",
8808 spd_mod_attr, (spd_mod_attr >> 4) + 1,
8809 ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8810 spd_rdimm_registers);
8811 }
8812 dimm_type_name = ddr4_dimm_types[spd_dimm_type];
8813 } else { /* if (ddr_type == DDR4_DRAM) */
8814 const char *signal_load[4] = { "UNK", "MLS", "SLS", "RSV" };
8815
8816 imp_val = &ddr3_impedence_val;
8817
8818 spd_addr =
8819 read_spd(&dimm_config_table[0], 0,
8820 DDR3_SPD_ADDRESSING_ROW_COL_BITS);
8821 spd_org =
8822 read_spd(&dimm_config_table[0], 0,
8823 DDR3_SPD_MODULE_ORGANIZATION);
8824 spd_banks =
8825 read_spd(&dimm_config_table[0], 0,
8826 DDR3_SPD_DENSITY_BANKS) & 0xff;
8827
8828 bank_bits = 3 + ((spd_banks >> 4) & 0x7);
8829 /* Controller can only address 3 bits. */
8830 bank_bits = min((int)bank_bits, 3);
8831 spd_dimm_type =
8832 0x0f & read_spd(&dimm_config_table[0], 0,
8833 DDR3_SPD_KEY_BYTE_MODULE_TYPE);
8834 spd_rdimm = (spd_dimm_type == 1) || (spd_dimm_type == 5) ||
8835 (spd_dimm_type == 9);
8836
8837 spd_package =
8838 0xFF & read_spd(&dimm_config_table[0], 0,
8839 DDR3_SPD_SDRAM_DEVICE_TYPE);
8840 if (spd_package & 0x80) { // non-standard device
8841 debug("DDR3: Device Type 0x%02x (%s), %d die\n",
8842 spd_package, signal_load[(spd_package & 3)],
8843 ((1 << ((spd_package >> 4) & 7)) >> 1));
8844 } else if (spd_package != 0) {
8845 // FIXME: print non-zero monolithic device definition
8846 debug("DDR3: Device Type MONOLITHIC: %d die, signal load %d\n",
8847 ((1 << (spd_package >> 4) & 7) >> 1),
8848 (spd_package & 3));
8849 }
8850
8851 spd_rawcard =
8852 0xFF & read_spd(&dimm_config_table[0], 0,
8853 DDR3_SPD_REFERENCE_RAW_CARD);
8854 debug("DDR3: Reference Raw Card 0x%02x\n", spd_rawcard);
8855 spd_thermal_sensor =
8856 read_spd(&dimm_config_table[0], 0,
8857 DDR3_SPD_MODULE_THERMAL_SENSOR);
8858
8859 if (spd_rdimm) {
8860 int spd_mfgr_id, spd_register_rev, spd_mod_attr;
8861
8862 spd_mfgr_id =
8863 (0xFFU &
8864 read_spd(&dimm_config_table[0], 0,
8865 DDR3_SPD_REGISTER_MANUFACTURER_ID_LSB)) |
8866 ((0xFFU &
8867 read_spd(&dimm_config_table[0], 0,
8868 DDR3_SPD_REGISTER_MANUFACTURER_ID_MSB))
8869 << 8);
8870 spd_register_rev =
8871 0xFFU & read_spd(&dimm_config_table[0], 0,
8872 DDR3_SPD_REGISTER_REVISION_NUMBER);
8873 debug("DDR3: RDIMM Register Manufacturer ID 0x%x Revision 0x%02x\n",
8874 spd_mfgr_id, spd_register_rev);
8875 // Module Attributes
8876 spd_mod_attr =
8877 0xFFU & read_spd(&dimm_config_table[0], 0,
8878 DDR3_SPD_ADDRESS_MAPPING);
8879 spd_rdimm_registers = ((1 << (spd_mod_attr & 3)) >> 1);
8880 debug("DDR3: RDIMM Module Attributes (0x%02x): DRAM rows %d, Registers %d\n",
8881 spd_mod_attr,
8882 ((1 << ((spd_mod_attr >> 2) & 3)) >> 1),
8883 spd_rdimm_registers);
8884 }
8885 dimm_type_name = ddr3_dimm_types[spd_dimm_type];
8886 }
8887
8888 if (spd_thermal_sensor & 0x80) {
8889 debug("DDR%d: SPD: Thermal Sensor PRESENT\n",
8890 (ddr_type == DDR4_DRAM) ? 4 : 3);
8891 }
8892
8893 debug("spd_addr : %#06x\n", spd_addr);
8894 debug("spd_org : %#06x\n", spd_org);
8895 debug("spd_banks : %#06x\n", spd_banks);
8896
8897 row_bits = 12 + ((spd_addr >> 3) & 0x7);
8898 col_bits = 9 + ((spd_addr >> 0) & 0x7);
8899
8900 num_ranks = 1 + ((spd_org >> 3) & 0x7);
8901 dram_width = 4 << ((spd_org >> 0) & 0x7);
8902 num_banks = 1 << bank_bits;
8903
8904 s = lookup_env(priv, "ddr_num_ranks");
8905 if (s)
8906 num_ranks = simple_strtoul(s, NULL, 0);
8907
8908 s = lookup_env(priv, "ddr_enable_by_rank_init");
8909 if (s)
8910 enable_by_rank_init = !!simple_strtoul(s, NULL, 0);
8911
8912 // FIXME: for now, we can only handle a DDR4 2rank-1slot config
8913 // FIXME: also, by-rank init does not work correctly if 32-bit mode...
8914 if (enable_by_rank_init && (ddr_type != DDR4_DRAM ||
8915 dimm_count != 1 || if_64b != 1 ||
8916 num_ranks != 2))
8917 enable_by_rank_init = 0;
8918
8919 if (enable_by_rank_init) {
8920 struct dimm_odt_config *odt_config;
8921 union cvmx_lmcx_modereg_params1 mp1;
8922 union cvmx_lmcx_modereg_params2 modereg_params2;
8923 int by_rank_rodt, by_rank_wr, by_rank_park;
8924
8925 // Do ODT settings changes which work best for 2R-1S configs
8926 debug("DDR4: 2R-1S special BY-RANK init ODT settings updated\n");
8927
8928 // setup for modifying config table values - 2 ranks and 1 DIMM
8929 odt_config =
8930 (struct dimm_odt_config *)&ddr_conf->odt_2rank_config[0];
8931
8932 // original was 80, first try was 60
8933 by_rank_rodt = ddr4_rodt_ctl_48_ohm;
8934 s = lookup_env(priv, "ddr_by_rank_rodt");
8935 if (s)
8936 by_rank_rodt = strtoul(s, NULL, 0);
8937
8938 odt_config->qs_dic = /*RODT_CTL */ by_rank_rodt;
8939
8940 // this is for MODEREG_PARAMS1 fields
8941 // fetch the original settings
8942 mp1.u64 = odt_config->modereg_params1.u64;
8943
8944 by_rank_wr = ddr4_rttwr_80ohm; // originals were 240
8945 s = lookup_env(priv, "ddr_by_rank_wr");
8946 if (s)
8947 by_rank_wr = simple_strtoul(s, NULL, 0);
8948
8949 // change specific settings here...
8950 insrt_wr(&mp1.u64, /*rank */ 00, by_rank_wr);
8951 insrt_wr(&mp1.u64, /*rank */ 01, by_rank_wr);
8952
8953 // save final settings
8954 odt_config->modereg_params1.u64 = mp1.u64;
8955
8956 // this is for MODEREG_PARAMS2 fields
8957 // fetch the original settings
8958 modereg_params2.u64 = odt_config->modereg_params2.u64;
8959
8960 by_rank_park = ddr4_rttpark_none; // originals were 120
8961 s = lookup_env(priv, "ddr_by_rank_park");
8962 if (s)
8963 by_rank_park = simple_strtoul(s, NULL, 0);
8964
8965 // change specific settings here...
8966 modereg_params2.s.rtt_park_00 = by_rank_park;
8967 modereg_params2.s.rtt_park_01 = by_rank_park;
8968
8969 // save final settings
8970 odt_config->modereg_params2.u64 = modereg_params2.u64;
8971 }
8972
8973 /*
8974 * FIX
8975 * Check that values are within some theoretical limits.
8976 * col_bits(min) = row_lsb(min) - bank_bits(max) - bus_bits(max) =
8977 * 14 - 3 - 4 = 7
8978 * col_bits(max) = row_lsb(max) - bank_bits(min) - bus_bits(min) =
8979 * 18 - 2 - 3 = 13
8980 */
8981 if (col_bits > 13 || col_bits < 7) {
8982 printf("Unsupported number of Col Bits: %d\n", col_bits);
8983 ++fatal_error;
8984 }
8985
8986 /*
8987 * FIX
8988 * Check that values are within some theoretical limits.
8989 * row_bits(min) = pbank_lsb(min) - row_lsb(max) - rank_bits =
8990 * 26 - 18 - 1 = 7
8991 * row_bits(max) = pbank_lsb(max) - row_lsb(min) - rank_bits =
8992 * 33 - 14 - 1 = 18
8993 */
8994 if (row_bits > 18 || row_bits < 7) {
8995 printf("Unsupported number of Row Bits: %d\n", row_bits);
8996 ++fatal_error;
8997 }
8998
8999 s = lookup_env(priv, "ddr_rdimm_ena");
9000 if (s)
9001 spd_rdimm = !!simple_strtoul(s, NULL, 0);
9002
9003 wl_loops = WLEVEL_LOOPS_DEFAULT;
9004 // accept generic or interface-specific override
9005 s = lookup_env(priv, "ddr_wlevel_loops");
9006 if (!s)
9007 s = lookup_env(priv, "ddr%d_wlevel_loops", if_num);
9008
9009 if (s)
9010 wl_loops = strtoul(s, NULL, 0);
9011
9012 s = lookup_env(priv, "ddr_ranks");
9013 if (s)
9014 num_ranks = simple_strtoul(s, NULL, 0);
9015
9016 bunk_enable = (num_ranks > 1);
9017
9018 if (octeon_is_cpuid(OCTEON_CN7XXX))
9019 column_bits_start = 3;
9020 else
9021 printf("ERROR: Unsupported Octeon model: 0x%x\n",
9022 read_c0_prid());
9023
9024 row_lsb = column_bits_start + col_bits + bank_bits - (!if_64b);
9025 debug("row_lsb = column_bits_start + col_bits + bank_bits = %d\n",
9026 row_lsb);
9027
9028 pbank_lsb = row_lsb + row_bits + bunk_enable;
9029 debug("pbank_lsb = row_lsb + row_bits + bunk_enable = %d\n", pbank_lsb);
9030
9031 if (lranks_per_prank > 1) {
9032 pbank_lsb = row_lsb + row_bits + lranks_bits + bunk_enable;
9033 debug("DDR4: 3DS: pbank_lsb = (%d row_lsb) + (%d row_bits) + (%d lranks_bits) + (%d bunk_enable) = %d\n",
9034 row_lsb, row_bits, lranks_bits, bunk_enable, pbank_lsb);
9035 }
9036
9037 mem_size_mbytes = dimm_count * ((1ull << pbank_lsb) >> 20);
9038 if (num_ranks == 4) {
9039 /*
9040 * Quad rank dimm capacity is equivalent to two dual-rank
9041 * dimms.
9042 */
9043 mem_size_mbytes *= 2;
9044 }
9045
9046 /*
9047 * Mask with 1 bits set for for each active rank, allowing 2 bits
9048 * per dimm. This makes later calculations simpler, as a variety
9049 * of CSRs use this layout. This init needs to be updated for dual
9050 * configs (ie non-identical DIMMs).
9051 *
9052 * Bit 0 = dimm0, rank 0
9053 * Bit 1 = dimm0, rank 1
9054 * Bit 2 = dimm1, rank 0
9055 * Bit 3 = dimm1, rank 1
9056 * ...
9057 */
9058 rank_mask = 0x1;
9059 if (num_ranks > 1)
9060 rank_mask = 0x3;
9061 if (num_ranks > 2)
9062 rank_mask = 0xf;
9063
9064 for (i = 1; i < dimm_count; i++)
9065 rank_mask |= ((rank_mask & 0x3) << (2 * i));
9066
9067 /*
9068 * If we are booting from RAM, the DRAM controller is
9069 * already set up. Just return the memory size
9070 */
9071 if (priv->flags & FLAG_RAM_RESIDENT) {
9072 debug("Ram Boot: Skipping LMC config\n");
9073 return mem_size_mbytes;
9074 }
9075
9076 if (ddr_type == DDR4_DRAM) {
9077 spd_ecc =
9078 !!(read_spd
9079 (&dimm_config_table[0], 0,
9080 DDR4_SPD_MODULE_MEMORY_BUS_WIDTH) & 8);
9081 } else {
9082 spd_ecc =
9083 !!(read_spd
9084 (&dimm_config_table[0], 0,
9085 DDR3_SPD_MEMORY_BUS_WIDTH) & 8);
9086 }
9087
9088 char rank_spec[8];
9089
9090 printable_rank_spec(rank_spec, num_ranks, dram_width, spd_package);
9091 debug("Summary: %d %s%s %s %s, row bits=%d, col bits=%d, bank bits=%d\n",
9092 dimm_count, dimm_type_name, (dimm_count > 1) ? "s" : "",
9093 rank_spec,
9094 (spd_ecc) ? "ECC" : "non-ECC", row_bits, col_bits, bank_bits);
9095
9096 if (ddr_type == DDR4_DRAM) {
9097 spd_cas_latency =
9098 ((0xff &
9099 read_spd(&dimm_config_table[0], 0,
9100 DDR4_SPD_CAS_LATENCIES_BYTE0)) << 0);
9101 spd_cas_latency |=
9102 ((0xff &
9103 read_spd(&dimm_config_table[0], 0,
9104 DDR4_SPD_CAS_LATENCIES_BYTE1)) << 8);
9105 spd_cas_latency |=
9106 ((0xff &
9107 read_spd(&dimm_config_table[0], 0,
9108 DDR4_SPD_CAS_LATENCIES_BYTE2)) << 16);
9109 spd_cas_latency |=
9110 ((0xff &
9111 read_spd(&dimm_config_table[0], 0,
9112 DDR4_SPD_CAS_LATENCIES_BYTE3)) << 24);
9113 } else {
9114 spd_cas_latency =
9115 0xff & read_spd(&dimm_config_table[0], 0,
9116 DDR3_SPD_CAS_LATENCIES_LSB);
9117 spd_cas_latency |=
9118 ((0xff &
9119 read_spd(&dimm_config_table[0], 0,
9120 DDR3_SPD_CAS_LATENCIES_MSB)) << 8);
9121 }
9122 debug("spd_cas_latency : %#06x\n", spd_cas_latency);
9123
9124 if (ddr_type == DDR4_DRAM) {
9125 /*
9126 * No other values for DDR4 MTB and FTB are specified at the
9127 * current time so don't bother reading them. Can't speculate
9128 * how new values will be represented.
9129 */
9130 int spdmtb = 125;
9131 int spdftb = 1;
9132
9133 taamin = spdmtb * read_spd(&dimm_config_table[0], 0,
9134 DDR4_SPD_MIN_CAS_LATENCY_TAAMIN) +
9135 spdftb * (signed char)read_spd(&dimm_config_table[0],
9136 0, DDR4_SPD_MIN_CAS_LATENCY_FINE_TAAMIN);
9137
9138 ddr4_tckavgmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9139 DDR4_SPD_MINIMUM_CYCLE_TIME_TCKAVGMIN) +
9140 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9141 DDR4_SPD_MIN_CYCLE_TIME_FINE_TCKAVGMIN);
9142
9143 ddr4_tckavgmax = spdmtb * read_spd(&dimm_config_table[0], 0,
9144 DDR4_SPD_MAXIMUM_CYCLE_TIME_TCKAVGMAX) +
9145 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9146 DDR4_SPD_MAX_CYCLE_TIME_FINE_TCKAVGMAX);
9147
9148 ddr4_trdcmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9149 DDR4_SPD_MIN_RAS_CAS_DELAY_TRCDMIN) +
9150 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9151 DDR4_SPD_MIN_RAS_TO_CAS_DELAY_FINE_TRCDMIN);
9152
9153 ddr4_trpmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9154 DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN) +
9155 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9156 DDR4_SPD_MIN_ROW_PRECHARGE_DELAY_FINE_TRPMIN);
9157
9158 ddr4_trasmin = spdmtb *
9159 (((read_spd
9160 (&dimm_config_table[0], 0,
9161 DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8) +
9162 (read_spd
9163 (&dimm_config_table[0], 0,
9164 DDR4_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN) & 0xff));
9165
9166 ddr4_trcmin = spdmtb *
9167 ((((read_spd
9168 (&dimm_config_table[0], 0,
9169 DDR4_SPD_UPPER_NIBBLES_TRAS_TRC) >> 4) & 0xf) <<
9170 8) + (read_spd
9171 (&dimm_config_table[0], 0,
9172 DDR4_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN) &
9173 0xff))
9174 + spdftb * (signed char)read_spd(&dimm_config_table[0],
9175 0,
9176 DDR4_SPD_MIN_ACT_TO_ACT_REFRESH_DELAY_FINE_TRCMIN);
9177
9178 ddr4_trfc1min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9179 DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC1MIN) & 0xff) <<
9180 8) + (read_spd(&dimm_config_table[0], 0,
9181 DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC1MIN) & 0xff));
9182
9183 ddr4_trfc2min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9184 DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC2MIN) & 0xff) <<
9185 8) + (read_spd(&dimm_config_table[0], 0,
9186 DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC2MIN) & 0xff));
9187
9188 ddr4_trfc4min = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9189 DDR4_SPD_MIN_REFRESH_RECOVERY_MSB_TRFC4MIN) & 0xff) <<
9190 8) + (read_spd(&dimm_config_table[0], 0,
9191 DDR4_SPD_MIN_REFRESH_RECOVERY_LSB_TRFC4MIN) & 0xff));
9192
9193 ddr4_tfawmin = spdmtb * (((read_spd(&dimm_config_table[0], 0,
9194 DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_MSN_TFAWMIN) & 0xf) <<
9195 8) + (read_spd(&dimm_config_table[0], 0,
9196 DDR4_SPD_MIN_FOUR_ACTIVE_WINDOW_LSB_TFAWMIN) & 0xff));
9197
9198 ddr4_trrd_smin = spdmtb * read_spd(&dimm_config_table[0], 0,
9199 DDR4_SPD_MIN_ROW_ACTIVE_DELAY_SAME_TRRD_SMIN) +
9200 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9201 DDR4_SPD_MIN_ACT_TO_ACT_DELAY_DIFF_FINE_TRRD_SMIN);
9202
9203 ddr4_trrd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9204 DDR4_SPD_MIN_ROW_ACTIVE_DELAY_DIFF_TRRD_LMIN) +
9205 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9206 DDR4_SPD_MIN_ACT_TO_ACT_DELAY_SAME_FINE_TRRD_LMIN);
9207
9208 ddr4_tccd_lmin = spdmtb * read_spd(&dimm_config_table[0], 0,
9209 DDR4_SPD_MIN_CAS_TO_CAS_DELAY_TCCD_LMIN) +
9210 spdftb * (signed char)read_spd(&dimm_config_table[0], 0,
9211 DDR4_SPD_MIN_CAS_TO_CAS_DELAY_FINE_TCCD_LMIN);
9212
9213 debug("%-45s : %6d ps\n", "Medium Timebase (MTB)", spdmtb);
9214 debug("%-45s : %6d ps\n", "Fine Timebase (FTB)", spdftb);
9215
9216 debug("%-45s : %6d ps (%ld MT/s)\n",
9217 "SDRAM Minimum Cycle Time (tCKAVGmin)", ddr4_tckavgmin,
9218 pretty_psecs_to_mts(ddr4_tckavgmin));
9219 debug("%-45s : %6d ps\n",
9220 "SDRAM Maximum Cycle Time (tCKAVGmax)", ddr4_tckavgmax);
9221 debug("%-45s : %6d ps\n", "Minimum CAS Latency Time (taamin)",
9222 taamin);
9223 debug("%-45s : %6d ps\n",
9224 "Minimum RAS to CAS Delay Time (tRCDmin)", ddr4_trdcmin);
9225 debug("%-45s : %6d ps\n",
9226 "Minimum Row Precharge Delay Time (tRPmin)", ddr4_trpmin);
9227 debug("%-45s : %6d ps\n",
9228 "Minimum Active to Precharge Delay (tRASmin)",
9229 ddr4_trasmin);
9230 debug("%-45s : %6d ps\n",
9231 "Minimum Active to Active/Refr. Delay (tRCmin)",
9232 ddr4_trcmin);
9233 debug("%-45s : %6d ps\n",
9234 "Minimum Refresh Recovery Delay (tRFC1min)",
9235 ddr4_trfc1min);
9236 debug("%-45s : %6d ps\n",
9237 "Minimum Refresh Recovery Delay (tRFC2min)",
9238 ddr4_trfc2min);
9239 debug("%-45s : %6d ps\n",
9240 "Minimum Refresh Recovery Delay (tRFC4min)",
9241 ddr4_trfc4min);
9242 debug("%-45s : %6d ps\n",
9243 "Minimum Four Activate Window Time (tFAWmin)",
9244 ddr4_tfawmin);
9245 debug("%-45s : %6d ps\n",
9246 "Minimum Act. to Act. Delay (tRRD_Smin)", ddr4_trrd_smin);
9247 debug("%-45s : %6d ps\n",
9248 "Minimum Act. to Act. Delay (tRRD_Lmin)", ddr4_trrd_lmin);
9249 debug("%-45s : %6d ps\n",
9250 "Minimum CAS to CAS Delay Time (tCCD_Lmin)",
9251 ddr4_tccd_lmin);
9252
9253#define DDR4_TWR 15000
9254#define DDR4_TWTR_S 2500
9255
9256 tckmin = ddr4_tckavgmin;
9257 twr = DDR4_TWR;
9258 trcd = ddr4_trdcmin;
9259 trrd = ddr4_trrd_smin;
9260 trp = ddr4_trpmin;
9261 tras = ddr4_trasmin;
9262 trc = ddr4_trcmin;
9263 trfc = ddr4_trfc1min;
9264 twtr = DDR4_TWTR_S;
9265 tfaw = ddr4_tfawmin;
9266
9267 if (spd_rdimm) {
9268 spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9269 DDR4_SPD_RDIMM_ADDR_MAPPING_FROM_REGISTER_TO_DRAM) &
9270 0x1;
9271 } else {
9272 spd_addr_mirror = read_spd(&dimm_config_table[0], 0,
9273 DDR4_SPD_UDIMM_ADDR_MAPPING_FROM_EDGE) & 0x1;
9274 }
9275 debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9276 } else {
9277 spd_mtb_dividend =
9278 0xff & read_spd(&dimm_config_table[0], 0,
9279 DDR3_SPD_MEDIUM_TIMEBASE_DIVIDEND);
9280 spd_mtb_divisor =
9281 0xff & read_spd(&dimm_config_table[0], 0,
9282 DDR3_SPD_MEDIUM_TIMEBASE_DIVISOR);
9283 spd_tck_min =
9284 0xff & read_spd(&dimm_config_table[0], 0,
9285 DDR3_SPD_MINIMUM_CYCLE_TIME_TCKMIN);
9286 spd_taa_min =
9287 0xff & read_spd(&dimm_config_table[0], 0,
9288 DDR3_SPD_MIN_CAS_LATENCY_TAAMIN);
9289
9290 spd_twr =
9291 0xff & read_spd(&dimm_config_table[0], 0,
9292 DDR3_SPD_MIN_WRITE_RECOVERY_TWRMIN);
9293 spd_trcd =
9294 0xff & read_spd(&dimm_config_table[0], 0,
9295 DDR3_SPD_MIN_RAS_CAS_DELAY_TRCDMIN);
9296 spd_trrd =
9297 0xff & read_spd(&dimm_config_table[0], 0,
9298 DDR3_SPD_MIN_ROW_ACTIVE_DELAY_TRRDMIN);
9299 spd_trp =
9300 0xff & read_spd(&dimm_config_table[0], 0,
9301 DDR3_SPD_MIN_ROW_PRECHARGE_DELAY_TRPMIN);
9302 spd_tras =
9303 0xff & read_spd(&dimm_config_table[0], 0,
9304 DDR3_SPD_MIN_ACTIVE_PRECHARGE_LSB_TRASMIN);
9305 spd_tras |=
9306 ((0xff &
9307 read_spd(&dimm_config_table[0], 0,
9308 DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf) << 8);
9309 spd_trc =
9310 0xff & read_spd(&dimm_config_table[0], 0,
9311 DDR3_SPD_MIN_ACTIVE_REFRESH_LSB_TRCMIN);
9312 spd_trc |=
9313 ((0xff &
9314 read_spd(&dimm_config_table[0], 0,
9315 DDR3_SPD_UPPER_NIBBLES_TRAS_TRC) & 0xf0) << 4);
9316 spd_trfc =
9317 0xff & read_spd(&dimm_config_table[0], 0,
9318 DDR3_SPD_MIN_REFRESH_RECOVERY_LSB_TRFCMIN);
9319 spd_trfc |=
9320 ((0xff &
9321 read_spd(&dimm_config_table[0], 0,
9322 DDR3_SPD_MIN_REFRESH_RECOVERY_MSB_TRFCMIN)) <<
9323 8);
9324 spd_twtr =
9325 0xff & read_spd(&dimm_config_table[0], 0,
9326 DDR3_SPD_MIN_INTERNAL_WRITE_READ_CMD_TWTRMIN);
9327 spd_trtp =
9328 0xff & read_spd(&dimm_config_table[0], 0,
9329 DDR3_SPD_MIN_INTERNAL_READ_PRECHARGE_CMD_TRTPMIN);
9330 spd_tfaw =
9331 0xff & read_spd(&dimm_config_table[0], 0,
9332 DDR3_SPD_MIN_FOUR_ACTIVE_WINDOW_TFAWMIN);
9333 spd_tfaw |=
9334 ((0xff &
9335 read_spd(&dimm_config_table[0], 0,
9336 DDR3_SPD_UPPER_NIBBLE_TFAW) & 0xf) << 8);
9337 spd_addr_mirror =
9338 0xff & read_spd(&dimm_config_table[0], 0,
9339 DDR3_SPD_ADDRESS_MAPPING) & 0x1;
9340 /* Only address mirror unbuffered dimms. */
9341 spd_addr_mirror = spd_addr_mirror && !spd_rdimm;
9342 ftb_dividend =
9343 read_spd(&dimm_config_table[0], 0,
9344 DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) >> 4;
9345 ftb_divisor =
9346 read_spd(&dimm_config_table[0], 0,
9347 DDR3_SPD_FINE_TIMEBASE_DIVIDEND_DIVISOR) & 0xf;
9348 /* Make sure that it is not 0 */
9349 ftb_divisor = (ftb_divisor == 0) ? 1 : ftb_divisor;
9350
9351 debug("spd_twr : %#06x\n", spd_twr);
9352 debug("spd_trcd : %#06x\n", spd_trcd);
9353 debug("spd_trrd : %#06x\n", spd_trrd);
9354 debug("spd_trp : %#06x\n", spd_trp);
9355 debug("spd_tras : %#06x\n", spd_tras);
9356 debug("spd_trc : %#06x\n", spd_trc);
9357 debug("spd_trfc : %#06x\n", spd_trfc);
9358 debug("spd_twtr : %#06x\n", spd_twtr);
9359 debug("spd_trtp : %#06x\n", spd_trtp);
9360 debug("spd_tfaw : %#06x\n", spd_tfaw);
9361 debug("spd_addr_mirror : %#06x\n", spd_addr_mirror);
9362
9363 mtb_psec = spd_mtb_dividend * 1000 / spd_mtb_divisor;
9364 taamin = mtb_psec * spd_taa_min;
9365 taamin += ftb_dividend *
9366 (signed char)read_spd(&dimm_config_table[0],
9367 0, DDR3_SPD_MIN_CAS_LATENCY_FINE_TAAMIN) /
9368 ftb_divisor;
9369 tckmin = mtb_psec * spd_tck_min;
9370 tckmin += ftb_dividend *
9371 (signed char)read_spd(&dimm_config_table[0],
9372 0, DDR3_SPD_MINIMUM_CYCLE_TIME_FINE_TCKMIN) /
9373 ftb_divisor;
9374
9375 twr = spd_twr * mtb_psec;
9376 trcd = spd_trcd * mtb_psec;
9377 trrd = spd_trrd * mtb_psec;
9378 trp = spd_trp * mtb_psec;
9379 tras = spd_tras * mtb_psec;
9380 trc = spd_trc * mtb_psec;
9381 trfc = spd_trfc * mtb_psec;
9382 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) && trfc < 260000) {
9383 // default to this - because it works...
9384 int new_trfc = 260000;
9385
9386 s = env_get("ddr_trfc");
9387 if (s) {
9388 new_trfc = simple_strtoul(s, NULL, 0);
9389 printf("Parameter found in environment. ddr_trfc = %d\n",
9390 new_trfc);
9391 if (new_trfc < 160000 || new_trfc > 260000) {
9392 // back to default if out of range
9393 new_trfc = 260000;
9394 }
9395 }
9396 debug("N%d.LMC%d: Adjusting tRFC from %d to %d, for CN78XX Pass 2.x\n",
9397 node, if_num, trfc, new_trfc);
9398 trfc = new_trfc;
9399 }
9400
9401 twtr = spd_twtr * mtb_psec;
9402 trtp = spd_trtp * mtb_psec;
9403 tfaw = spd_tfaw * mtb_psec;
9404
9405 debug("Medium Timebase (MTB) : %6d ps\n",
9406 mtb_psec);
9407 debug("Minimum Cycle Time (tckmin) : %6d ps (%ld MT/s)\n",
9408 tckmin, pretty_psecs_to_mts(tckmin));
9409 debug("Minimum CAS Latency Time (taamin) : %6d ps\n",
9410 taamin);
9411 debug("Write Recovery Time (tWR) : %6d ps\n",
9412 twr);
9413 debug("Minimum RAS to CAS delay (tRCD) : %6d ps\n",
9414 trcd);
9415 debug("Minimum Row Active to Row Active delay (tRRD) : %6d ps\n",
9416 trrd);
9417 debug("Minimum Row Precharge Delay (tRP) : %6d ps\n",
9418 trp);
9419 debug("Minimum Active to Precharge (tRAS) : %6d ps\n",
9420 tras);
9421 debug("Minimum Active to Active/Refresh Delay (tRC) : %6d ps\n",
9422 trc);
9423 debug("Minimum Refresh Recovery Delay (tRFC) : %6d ps\n",
9424 trfc);
9425 debug("Internal write to read command delay (tWTR) : %6d ps\n",
9426 twtr);
9427 debug("Min Internal Rd to Precharge Cmd Delay (tRTP) : %6d ps\n",
9428 trtp);
9429 debug("Minimum Four Activate Window Delay (tFAW) : %6d ps\n",
9430 tfaw);
9431 }
9432
9433 /*
9434 * When the cycle time is within 1 psec of the minimum accept it
9435 * as a slight rounding error and adjust it to exactly the minimum
9436 * cycle time. This avoids an unnecessary warning.
9437 */
9438 if (abs(tclk_psecs - tckmin) < 2)
9439 tclk_psecs = tckmin;
9440
9441 if (tclk_psecs < (u64)tckmin) {
9442 printf("WARNING!!!!: DDR Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin: %ld)!!!!\n",
9443 tclk_psecs, (ulong)tckmin);
9444 }
9445
9446 debug("DDR Clock Rate (tCLK) : %6ld ps\n",
9447 tclk_psecs);
9448 debug("Core Clock Rate (eCLK) : %6ld ps\n",
9449 eclk_psecs);
9450
9451 s = env_get("ddr_use_ecc");
9452 if (s) {
9453 use_ecc = !!simple_strtoul(s, NULL, 0);
9454 printf("Parameter found in environment. ddr_use_ecc = %d\n",
9455 use_ecc);
9456 }
9457 use_ecc = use_ecc && spd_ecc;
9458
9459 if_bytemask = if_64b ? (use_ecc ? 0x1ff : 0xff)
9460 : (use_ecc ? 0x01f : 0x0f);
9461
9462 debug("DRAM Interface width: %d bits %s bytemask 0x%03x\n",
9463 if_64b ? 64 : 32, use_ecc ? "+ECC" : "", if_bytemask);
9464
9465 debug("\n------ Board Custom Configuration Settings ------\n");
9466 debug("%-45s : %d\n", "MIN_RTT_NOM_IDX ", c_cfg->min_rtt_nom_idx);
9467 debug("%-45s : %d\n", "MAX_RTT_NOM_IDX ", c_cfg->max_rtt_nom_idx);
9468 debug("%-45s : %d\n", "MIN_RODT_CTL ", c_cfg->min_rodt_ctl);
9469 debug("%-45s : %d\n", "MAX_RODT_CTL ", c_cfg->max_rodt_ctl);
9470 debug("%-45s : %d\n", "MIN_CAS_LATENCY ", c_cfg->min_cas_latency);
9471 debug("%-45s : %d\n", "OFFSET_EN ", c_cfg->offset_en);
9472 debug("%-45s : %d\n", "OFFSET_UDIMM ", c_cfg->offset_udimm);
9473 debug("%-45s : %d\n", "OFFSET_RDIMM ", c_cfg->offset_rdimm);
9474 debug("%-45s : %d\n", "DDR_RTT_NOM_AUTO ", c_cfg->ddr_rtt_nom_auto);
9475 debug("%-45s : %d\n", "DDR_RODT_CTL_AUTO ", c_cfg->ddr_rodt_ctl_auto);
9476 if (spd_rdimm)
9477 debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9478 c_cfg->rlevel_comp_offset_rdimm);
9479 else
9480 debug("%-45s : %d\n", "RLEVEL_COMP_OFFSET",
9481 c_cfg->rlevel_comp_offset_udimm);
9482 debug("%-45s : %d\n", "RLEVEL_COMPUTE ", c_cfg->rlevel_compute);
9483 debug("%-45s : %d\n", "DDR2T_UDIMM ", c_cfg->ddr2t_udimm);
9484 debug("%-45s : %d\n", "DDR2T_RDIMM ", c_cfg->ddr2t_rdimm);
9485 debug("%-45s : %d\n", "FPRCH2 ", c_cfg->fprch2);
9486 debug("%-45s : %d\n", "PTUNE_OFFSET ", c_cfg->ptune_offset);
9487 debug("%-45s : %d\n", "NTUNE_OFFSET ", c_cfg->ntune_offset);
9488 debug("-------------------------------------------------\n");
9489
9490 cl = divide_roundup(taamin, tclk_psecs);
9491
9492 debug("Desired CAS Latency : %6d\n", cl);
9493
9494 min_cas_latency = c_cfg->min_cas_latency;
9495
9496 s = lookup_env(priv, "ddr_min_cas_latency");
9497 if (s)
9498 min_cas_latency = simple_strtoul(s, NULL, 0);
9499
9500 debug("CAS Latencies supported in DIMM :");
9501 base_cl = (ddr_type == DDR4_DRAM) ? 7 : 4;
9502 for (i = 0; i < 32; ++i) {
9503 if ((spd_cas_latency >> i) & 1) {
9504 debug(" %d", i + base_cl);
9505 max_cas_latency = i + base_cl;
9506 if (min_cas_latency == 0)
9507 min_cas_latency = i + base_cl;
9508 }
9509 }
9510 debug("\n");
9511
9512 /*
9513 * Use relaxed timing when running slower than the minimum
9514 * supported speed. Adjust timing to match the smallest supported
9515 * CAS Latency.
9516 */
9517 if (min_cas_latency > cl) {
9518 ulong adjusted_tclk = taamin / min_cas_latency;
9519
9520 cl = min_cas_latency;
9521 debug("Slow clock speed. Adjusting timing: tClk = %ld, Adjusted tClk = %ld\n",
9522 tclk_psecs, adjusted_tclk);
9523 tclk_psecs = adjusted_tclk;
9524 }
9525
9526 s = env_get("ddr_cas_latency");
9527 if (s) {
9528 override_cas_latency = simple_strtoul(s, NULL, 0);
9529 printf("Parameter found in environment. ddr_cas_latency = %d\n",
9530 override_cas_latency);
9531 }
9532
9533 /* Make sure that the selected cas latency is legal */
9534 for (i = (cl - base_cl); i < 32; ++i) {
9535 if ((spd_cas_latency >> i) & 1) {
9536 cl = i + base_cl;
9537 break;
9538 }
9539 }
9540
9541 if (max_cas_latency < cl)
9542 cl = max_cas_latency;
9543
9544 if (override_cas_latency != 0)
9545 cl = override_cas_latency;
9546
9547 debug("CAS Latency : %6d\n", cl);
9548
9549 if ((cl * tckmin) > 20000) {
9550 debug("(CLactual * tckmin) = %d exceeds 20 ns\n",
9551 (cl * tckmin));
9552 }
9553
9554 if (tclk_psecs < (ulong)tckmin) {
9555 printf("WARNING!!!!!!: DDR3 Clock Rate (tCLK: %ld) exceeds DIMM specifications (tckmin:%ld)!!!!!!!!\n",
9556 tclk_psecs, (ulong)tckmin);
9557 }
9558
9559 if (num_banks != 4 && num_banks != 8 && num_banks != 16) {
9560 printf("Unsupported number of banks %d. Must be 4 or 8.\n",
9561 num_banks);
9562 ++fatal_error;
9563 }
9564
9565 if (num_ranks != 1 && num_ranks != 2 && num_ranks != 4) {
9566 printf("Unsupported number of ranks: %d\n", num_ranks);
9567 ++fatal_error;
9568 }
9569
9570 if (octeon_is_cpuid(OCTEON_CN78XX) ||
9571 octeon_is_cpuid(OCTEON_CN73XX) ||
9572 octeon_is_cpuid(OCTEON_CNF75XX)) {
9573 if (dram_width != 8 && dram_width != 16 && dram_width != 4) {
9574 printf("Unsupported SDRAM Width, %d. Must be 4, 8 or 16.\n",
9575 dram_width);
9576 ++fatal_error;
9577 }
9578 } else if (dram_width != 8 && dram_width != 16) {
9579 printf("Unsupported SDRAM Width, %d. Must be 8 or 16.\n",
9580 dram_width);
9581 ++fatal_error;
9582 }
9583
9584 /*
9585 ** Bail out here if things are not copasetic.
9586 */
9587 if (fatal_error)
9588 return (-1);
9589
9590 /*
9591 * 4.8.4 LMC RESET Initialization
9592 *
9593 * The purpose of this step is to assert/deassert the RESET# pin at the
9594 * DDR3/DDR4 parts.
9595 *
9596 * This LMC RESET step is done for all enabled LMCs.
9597 */
9598 perform_lmc_reset(priv, node, if_num);
9599
9600 // Make sure scrambling is disabled during init...
9601 ctrl.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(if_num));
9602 ctrl.s.scramble_ena = 0;
9603 lmc_wr(priv, CVMX_LMCX_CONTROL(if_num), ctrl.u64);
9604
9605 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG0(if_num), 0);
9606 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG1(if_num), 0);
9607 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS1_X))
9608 lmc_wr(priv, CVMX_LMCX_SCRAMBLE_CFG2(if_num), 0);
9609
9610 odt_idx = min(dimm_count - 1, 3);
9611
9612 switch (num_ranks) {
9613 case 1:
9614 odt_config = odt_1rank_config;
9615 break;
9616 case 2:
9617 odt_config = odt_2rank_config;
9618 break;
9619 case 4:
9620 odt_config = odt_4rank_config;
9621 break;
9622 default:
9623 odt_config = disable_odt_config;
9624 printf("Unsupported number of ranks: %d\n", num_ranks);
9625 ++fatal_error;
9626 }
9627
9628 /*
9629 * 4.8.5 Early LMC Initialization
9630 *
9631 * All of DDR PLL, LMC CK, and LMC DRESET initializations must be
9632 * completed prior to starting this LMC initialization sequence.
9633 *
9634 * Perform the following five substeps for early LMC initialization:
9635 *
9636 * 1. Software must ensure there are no pending DRAM transactions.
9637 *
9638 * 2. Write LMC(0)_CONFIG, LMC(0)_CONTROL, LMC(0)_TIMING_PARAMS0,
9639 * LMC(0)_TIMING_PARAMS1, LMC(0)_MODEREG_PARAMS0,
9640 * LMC(0)_MODEREG_PARAMS1, LMC(0)_DUAL_MEMCFG, LMC(0)_NXM,
9641 * LMC(0)_WODT_MASK, LMC(0)_RODT_MASK, LMC(0)_COMP_CTL2,
9642 * LMC(0)_PHY_CTL, LMC(0)_DIMM0/1_PARAMS, and LMC(0)_DIMM_CTL with
9643 * appropriate values. All sections in this chapter can be used to
9644 * derive proper register settings.
9645 */
9646
9647 /* LMC(0)_CONFIG */
9648 lmc_config(priv);
9649
9650 /* LMC(0)_CONTROL */
9651 lmc_control(priv);
9652
9653 /* LMC(0)_TIMING_PARAMS0 */
9654 lmc_timing_params0(priv);
9655
9656 /* LMC(0)_TIMING_PARAMS1 */
9657 lmc_timing_params1(priv);
9658
9659 /* LMC(0)_TIMING_PARAMS2 */
9660 lmc_timing_params2(priv);
9661
9662 /* LMC(0)_MODEREG_PARAMS0 */
9663 lmc_modereg_params0(priv);
9664
9665 /* LMC(0)_MODEREG_PARAMS1 */
9666 lmc_modereg_params1(priv);
9667
9668 /* LMC(0)_MODEREG_PARAMS2 */
9669 lmc_modereg_params2(priv);
9670
9671 /* LMC(0)_MODEREG_PARAMS3 */
9672 lmc_modereg_params3(priv);
9673
9674 /* LMC(0)_NXM */
9675 lmc_nxm(priv);
9676
9677 /* LMC(0)_WODT_MASK */
9678 lmc_wodt_mask(priv);
9679
9680 /* LMC(0)_RODT_MASK */
9681 lmc_rodt_mask(priv);
9682
9683 /* LMC(0)_COMP_CTL2 */
9684 lmc_comp_ctl2(priv);
9685
9686 /* LMC(0)_PHY_CTL */
9687 lmc_phy_ctl(priv);
9688
9689 /* LMC(0)_EXT_CONFIG */
9690 lmc_ext_config(priv);
9691
9692 /* LMC(0)_EXT_CONFIG2 */
9693 lmc_ext_config2(priv);
9694
9695 /* LMC(0)_DIMM0/1_PARAMS */
9696 lmc_dimm01_params(priv);
9697
9698 ret = lmc_rank_init(priv);
9699 if (ret < 0)
9700 return 0; /* 0 indicates problem */
9701
9702 lmc_config_2(priv);
9703
9704 lmc_write_leveling(priv);
9705
9706 lmc_read_leveling(priv);
9707
9708 lmc_workaround(priv);
9709
9710 ret = lmc_sw_write_leveling(priv);
9711 if (ret < 0)
9712 return 0; /* 0 indicates problem */
9713
9714 // this sometimes causes stack overflow crashes..
9715 // display only for DDR4 RDIMMs.
9716 if (ddr_type == DDR4_DRAM && spd_rdimm) {
9717 int i;
9718
9719 for (i = 0; i < 3; i += 2) // just pages 0 and 2 for now..
9720 display_mpr_page(priv, rank_mask, if_num, i);
9721 }
9722
9723 lmc_dll(priv);
9724
9725 lmc_workaround_2(priv);
9726
9727 lmc_final(priv);
9728
9729 lmc_scrambling(priv);
9730
9731 return mem_size_mbytes;
9732}
9733
9734///// HW-assist byte DLL offset tuning //////
9735
9736static int cvmx_dram_get_num_lmc(struct ddr_priv *priv)
9737{
9738 union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9739
9740 if (octeon_is_cpuid(OCTEON_CN70XX))
9741 return 1;
9742
9743 if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX)) {
9744 // sample LMC1
9745 lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(1));
9746 if (lmcx_dll_ctl2.cn78xx.intf_en)
9747 return 2;
9748 else
9749 return 1;
9750 }
9751
9752 // for CN78XX, LMCs are always active in pairs, and always LMC0/1
9753 // so, we sample LMC2 to see if 2 and 3 are active
9754 lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(2));
9755 if (lmcx_dll_ctl2.cn78xx.intf_en)
9756 return 4;
9757 else
9758 return 2;
9759}
9760
9761// got to do these here, even though already defined in BDK
9762
9763// all DDR3, and DDR4 x16 today, use only 3 bank bits;
9764// DDR4 x4 and x8 always have 4 bank bits
9765// NOTE: this will change in the future, when DDR4 x16 devices can
9766// come with 16 banks!! FIXME!!
9767static int cvmx_dram_get_num_bank_bits(struct ddr_priv *priv, int lmc)
9768{
9769 union cvmx_lmcx_dll_ctl2 lmcx_dll_ctl2;
9770 union cvmx_lmcx_config lmcx_config;
9771 union cvmx_lmcx_ddr_pll_ctl lmcx_ddr_pll_ctl;
9772 int bank_width;
9773
9774 // can always read this
9775 lmcx_dll_ctl2.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9776
9777 if (lmcx_dll_ctl2.cn78xx.dreset) // check LMCn
9778 return 0;
9779
9780 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_DLL_CTL2(lmc));
9781 lmcx_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(lmc));
9782
9783 bank_width = ((lmcx_ddr_pll_ctl.s.ddr4_mode != 0) &&
9784 (lmcx_config.s.bg2_enable)) ? 4 : 3;
9785
9786 return bank_width;
9787}
9788
9789#define EXTRACT(v, lsb, width) (((v) >> (lsb)) & ((1ull << (width)) - 1))
9790#define ADDRESS_HOLE 0x10000000ULL
9791
9792static void cvmx_dram_address_extract_info(struct ddr_priv *priv, u64 address,
9793 int *node, int *lmc, int *dimm,
9794 int *prank, int *lrank, int *bank,
9795 int *row, int *col)
9796{
9797 int bank_lsb, xbits;
9798 union cvmx_l2c_ctl l2c_ctl;
9799 union cvmx_lmcx_config lmcx_config;
9800 union cvmx_lmcx_control lmcx_control;
9801 union cvmx_lmcx_ext_config ext_config;
9802 int bitno = (octeon_is_cpuid(OCTEON_CN7XXX)) ? 20 : 18;
9803 int bank_width;
9804 int dimm_lsb;
9805 int dimm_width;
9806 int prank_lsb, lrank_lsb;
9807 int prank_width, lrank_width;
9808 int row_lsb;
9809 int row_width;
9810 int col_hi_lsb;
9811 int col_hi_width;
9812 int col_hi;
9813
9814 if (octeon_is_cpuid(OCTEON_CN73XX) || octeon_is_cpuid(OCTEON_CNF75XX))
9815 bitno = 18;
9816
9817 *node = EXTRACT(address, 40, 2); /* Address bits [41:40] */
9818
9819 address &= (1ULL << 40) - 1; // lop off any node bits or above
9820 if (address >= ADDRESS_HOLE) // adjust down if at HOLE or above
9821 address -= ADDRESS_HOLE;
9822
9823 /* Determine the LMC controllers */
Stefan Roese1a035f82020-12-11 17:05:56 +01009824 l2c_ctl.u64 = l2c_rd(priv, CVMX_L2C_CTL_REL);
Aaron Williamse60c6a72020-09-02 08:29:07 +02009825
9826 /* xbits depends on number of LMCs */
9827 xbits = cvmx_dram_get_num_lmc(priv) >> 1; // 4->2, 2->1, 1->0
9828 bank_lsb = 7 + xbits;
9829
9830 /* LMC number is probably aliased */
9831 if (l2c_ctl.s.disidxalias) {
9832 *lmc = EXTRACT(address, 7, xbits);
9833 } else {
9834 *lmc = EXTRACT(address, 7, xbits) ^
9835 EXTRACT(address, bitno, xbits) ^
9836 EXTRACT(address, 12, xbits);
9837 }
9838
9839 /* Figure out the bank field width */
9840 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(*lmc));
9841 ext_config.u64 = lmc_rd(priv, CVMX_LMCX_EXT_CONFIG(*lmc));
9842 bank_width = cvmx_dram_get_num_bank_bits(priv, *lmc);
9843
9844 /* Extract additional info from the LMC_CONFIG CSR */
9845 dimm_lsb = 28 + lmcx_config.s.pbank_lsb + xbits;
9846 dimm_width = 40 - dimm_lsb;
9847 prank_lsb = dimm_lsb - lmcx_config.s.rank_ena;
9848 prank_width = dimm_lsb - prank_lsb;
9849 lrank_lsb = prank_lsb - ext_config.s.dimm0_cid;
9850 lrank_width = prank_lsb - lrank_lsb;
9851 row_lsb = 14 + lmcx_config.s.row_lsb + xbits;
9852 row_width = lrank_lsb - row_lsb;
9853 col_hi_lsb = bank_lsb + bank_width;
9854 col_hi_width = row_lsb - col_hi_lsb;
9855
9856 /* Extract the parts of the address */
9857 *dimm = EXTRACT(address, dimm_lsb, dimm_width);
9858 *prank = EXTRACT(address, prank_lsb, prank_width);
9859 *lrank = EXTRACT(address, lrank_lsb, lrank_width);
9860 *row = EXTRACT(address, row_lsb, row_width);
9861
9862 /* bank calculation may be aliased... */
9863 lmcx_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(*lmc));
9864 if (lmcx_control.s.xor_bank) {
9865 *bank = EXTRACT(address, bank_lsb, bank_width) ^
9866 EXTRACT(address, 12 + xbits, bank_width);
9867 } else {
9868 *bank = EXTRACT(address, bank_lsb, bank_width);
9869 }
9870
9871 /* LMC number already extracted */
9872 col_hi = EXTRACT(address, col_hi_lsb, col_hi_width);
9873 *col = EXTRACT(address, 3, 4) | (col_hi << 4);
9874 /* Bus byte is address bits [2:0]. Unused here */
9875}
9876
9877// end of added workarounds
9878
9879// NOTE: "mode" argument:
9880// DBTRAIN_TEST: for testing using GP patterns, includes ECC
9881// DBTRAIN_DBI: for DBI deskew training behavior (uses GP patterns)
9882// DBTRAIN_LFSR: for testing using LFSR patterns, includes ECC
9883// NOTE: trust the caller to specify the correct/supported mode
9884//
9885static int test_dram_byte_hw(struct ddr_priv *priv, int if_num, u64 p,
9886 int mode, u64 *xor_data)
9887{
9888 u64 p1;
9889 u64 k;
9890 int errors = 0;
9891
9892 u64 mpr_data0, mpr_data1;
9893 u64 bad_bits[2] = { 0, 0 };
9894
9895 int node_address, lmc, dimm;
9896 int prank, lrank;
9897 int bank, row, col;
9898 int save_or_dis;
9899 int byte;
9900 int ba_loop, ba_bits;
9901
9902 union cvmx_lmcx_rlevel_ctl rlevel_ctl;
9903 union cvmx_lmcx_dbtrain_ctl dbtrain_ctl;
9904 union cvmx_lmcx_phy_ctl phy_ctl;
9905
9906 int biter_errs;
9907
9908 // FIXME: K iterations set to 4 for now.
9909 // FIXME: decrement to increase interations.
9910 // FIXME: must be no less than 22 to stay above an LMC hash field.
9911 int kshift = 27;
9912
9913 const char *s;
9914 int node = 0;
9915
9916 // allow override default setting for kshift
9917 s = env_get("ddr_tune_set_kshift");
9918 if (s) {
9919 int temp = simple_strtoul(s, NULL, 0);
9920
9921 if (temp < 22 || temp > 28) {
9922 debug("N%d.LMC%d: ILLEGAL override of kshift to %d, using default %d\n",
9923 node, if_num, temp, kshift);
9924 } else {
9925 debug("N%d.LMC%d: overriding kshift (%d) to %d\n",
9926 node, if_num, kshift, temp);
9927 kshift = temp;
9928 }
9929 }
9930
9931 /*
9932 * 1) Make sure that RLEVEL_CTL[OR_DIS] = 0.
9933 */
9934 rlevel_ctl.u64 = lmc_rd(priv, CVMX_LMCX_RLEVEL_CTL(if_num));
9935 save_or_dis = rlevel_ctl.s.or_dis;
9936 /* or_dis must be disabled for this sequence */
9937 rlevel_ctl.s.or_dis = 0;
9938 lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
9939
9940 /*
9941 * NOTE: this step done in the calling routine(s)...
9942 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
9943 * of choice.
9944 * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
9945 * (rising edge) 64 bits of data.
9946 * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
9947 * (falling edge) 64 bits of data.
9948 * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
9949 * (rising edge <7:0>) and upper (falling edge <15:8>) ECC data.
9950 */
9951
9952 // final address must include LMC and node
9953 p |= (if_num << 7); /* Map address into proper interface */
9954 p |= (u64)node << CVMX_NODE_MEM_SHIFT; // map to node
9955
9956 /*
9957 * Add base offset to both test regions to not clobber u-boot stuff
9958 * when running from L2 for NAND boot.
9959 */
9960 p += 0x20000000; // offset to 512MB, ie above THE HOLE!!!
9961 p |= 1ull << 63; // needed for OCTEON
9962
9963 errors = 0;
9964
9965 cvmx_dram_address_extract_info(priv, p, &node_address, &lmc, &dimm,
9966 &prank, &lrank, &bank, &row, &col);
9967 debug("%s: START at A:0x%012llx, N%d L%d D%d/%d R%d B%1x Row:%05x Col:%05x\n",
9968 __func__, p, node_address, lmc, dimm, prank, lrank, bank,
9969 row, col);
9970
9971 // only check once per call, and ignore if no match...
9972 if ((int)node != node_address) {
9973 printf("ERROR: Node address mismatch\n");
9974 return 0;
9975 }
9976 if (lmc != if_num) {
9977 printf("ERROR: LMC address mismatch\n");
9978 return 0;
9979 }
9980
9981 /*
9982 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically clears this as
9983 * it’s a one-shot operation). This is to get into the habit of
9984 * resetting PHY’s SILO to the original 0 location.
9985 */
9986 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
9987 phy_ctl.s.phy_reset = 1;
9988 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
9989
9990 /*
9991 * Walk through a range of addresses avoiding bits that alias
9992 * interfaces on the CN88XX.
9993 */
9994
9995 // FIXME: want to try to keep the K increment from affecting the
9996 // LMC via hash, so keep it above bit 21 we also want to keep k
9997 // less than the base offset of bit 29 (512MB)
9998
9999 for (k = 0; k < (1UL << 29); k += (1UL << kshift)) {
10000 // FIXME: the sequence will interate over 1/2 cacheline
10001 // FIXME: for each unit specified in "read_cmd_count",
10002 // FIXME: so, we setup each sequence to do the max cachelines
10003 // it can
10004
10005 p1 = p + k;
10006
10007 cvmx_dram_address_extract_info(priv, p1, &node_address, &lmc,
10008 &dimm, &prank, &lrank, &bank,
10009 &row, &col);
10010
10011 /*
10012 * 2) Setup the fields of the CSR DBTRAIN_CTL as follows:
10013 * a. COL, ROW, BA, BG, PRANK points to the starting point
10014 * of the address.
10015 * You can just set them to all 0.
10016 * b. RW_TRAIN – set this to 1.
10017 * c. TCCD_L – set this to 0.
10018 * d. READ_CMD_COUNT – instruct the sequence to the how many
10019 * writes/reads.
10020 * It is 5 bits field, so set to 31 of maximum # of r/w.
10021 */
10022 dbtrain_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DBTRAIN_CTL(if_num));
10023 dbtrain_ctl.s.column_a = col;
10024 dbtrain_ctl.s.row_a = row;
10025 dbtrain_ctl.s.bg = (bank >> 2) & 3;
10026 dbtrain_ctl.s.prank = (dimm * 2) + prank; // FIXME?
10027 dbtrain_ctl.s.lrank = lrank; // FIXME?
10028 dbtrain_ctl.s.activate = (mode == DBTRAIN_DBI);
10029 dbtrain_ctl.s.write_ena = 1;
10030 dbtrain_ctl.s.read_cmd_count = 31; // max count pass 1.x
10031 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X) ||
10032 octeon_is_cpuid(OCTEON_CNF75XX)) {
10033 // max count on chips that support it
10034 dbtrain_ctl.s.cmd_count_ext = 3;
10035 } else {
10036 // max count pass 1.x
10037 dbtrain_ctl.s.cmd_count_ext = 0;
10038 }
10039
10040 dbtrain_ctl.s.rw_train = 1;
10041 dbtrain_ctl.s.tccd_sel = (mode == DBTRAIN_DBI);
10042 // LFSR should only be on when chip supports it...
10043 dbtrain_ctl.s.lfsr_pattern_sel = (mode == DBTRAIN_LFSR) ? 1 : 0;
10044
10045 biter_errs = 0;
10046
10047 // for each address, iterate over the 4 "banks" in the BA
10048 for (ba_loop = 0, ba_bits = bank & 3;
10049 ba_loop < 4; ba_loop++, ba_bits = (ba_bits + 1) & 3) {
10050 dbtrain_ctl.s.ba = ba_bits;
10051 lmc_wr(priv, CVMX_LMCX_DBTRAIN_CTL(if_num),
10052 dbtrain_ctl.u64);
10053
10054 /*
10055 * We will use the RW_TRAINING sequence (14) for
10056 * this task.
10057 *
10058 * 4) Kick off the sequence (SEQ_CTL[SEQ_SEL] = 14,
10059 * SEQ_CTL[INIT_START] = 1).
10060 * 5) Poll on SEQ_CTL[SEQ_COMPLETE] for completion.
10061 */
10062 oct3_ddr3_seq(priv, prank, if_num, 14);
10063
10064 /*
10065 * 6) Read MPR_DATA0 and MPR_DATA1 for results.
10066 * a. MPR_DATA0[MPR_DATA<63:0>] – comparison results
10067 * for DQ63:DQ0. (1 means MATCH, 0 means FAIL).
10068 * b. MPR_DATA1[MPR_DATA<7:0>] – comparison results
10069 * for ECC bit7:0.
10070 */
10071 mpr_data0 = lmc_rd(priv, CVMX_LMCX_MPR_DATA0(if_num));
10072 mpr_data1 = lmc_rd(priv, CVMX_LMCX_MPR_DATA1(if_num));
10073
10074 /*
10075 * 7) Set PHY_CTL[PHY_RESET] = 1 (LMC automatically
10076 * clears this as it’s a one-shot operation).
10077 * This is to get into the habit of resetting PHY’s
10078 * SILO to the original 0 location.
10079 */
10080 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(if_num));
10081 phy_ctl.s.phy_reset = 1;
10082 lmc_wr(priv, CVMX_LMCX_PHY_CTL(if_num), phy_ctl.u64);
10083
10084 // bypass any error checking or updating when DBI mode
10085 if (mode == DBTRAIN_DBI)
10086 continue;
10087
10088 // data bytes
10089 if (~mpr_data0) {
10090 for (byte = 0; byte < 8; byte++) {
10091 if ((~mpr_data0 >> (8 * byte)) & 0xffUL)
10092 biter_errs |= (1 << byte);
10093 }
10094 // accumulate bad bits
10095 bad_bits[0] |= ~mpr_data0;
10096 }
10097
10098 // include ECC byte errors
10099 if (~mpr_data1 & 0xffUL) {
10100 biter_errs |= (1 << 8);
10101 bad_bits[1] |= ~mpr_data1 & 0xffUL;
10102 }
10103 }
10104
10105 errors |= biter_errs;
10106 } /* end for (k=...) */
10107
10108 rlevel_ctl.s.or_dis = save_or_dis;
10109 lmc_wr(priv, CVMX_LMCX_RLEVEL_CTL(if_num), rlevel_ctl.u64);
10110
10111 // send the bad bits back...
10112 if (mode != DBTRAIN_DBI && xor_data) {
10113 xor_data[0] = bad_bits[0];
10114 xor_data[1] = bad_bits[1];
10115 }
10116
10117 return errors;
10118}
10119
10120// setup default for byte test pattern array
10121// take these from the HRM section 6.9.13
10122static const u64 byte_pattern_0[] = {
10123 0xFFAAFFFFFF55FFFFULL, // GP0
10124 0x55555555AAAAAAAAULL, // GP1
10125 0xAA55AAAAULL, // GP2
10126};
10127
10128static const u64 byte_pattern_1[] = {
10129 0xFBF7EFDFBF7FFEFDULL, // GP0
10130 0x0F1E3C78F0E1C387ULL, // GP1
10131 0xF0E1BF7FULL, // GP2
10132};
10133
10134// this is from Andrew via LFSR with PRBS=0xFFFFAAAA
10135static const u64 byte_pattern_2[] = {
10136 0xEE55AADDEE55AADDULL, // GP0
10137 0x55AADDEE55AADDEEULL, // GP1
10138 0x55EEULL, // GP2
10139};
10140
10141// this is from Mike via LFSR with PRBS=0x4A519909
10142static const u64 byte_pattern_3[] = {
10143 0x0088CCEE0088CCEEULL, // GP0
10144 0xBB552211BB552211ULL, // GP1
10145 0xBB00ULL, // GP2
10146};
10147
10148static const u64 *byte_patterns[4] = {
10149 byte_pattern_0, byte_pattern_1, byte_pattern_2, byte_pattern_3
10150};
10151
10152static const u32 lfsr_patterns[4] = {
10153 0xFFFFAAAAUL, 0x06000000UL, 0xAAAAFFFFUL, 0x4A519909UL
10154};
10155
10156#define NUM_BYTE_PATTERNS 4
10157
10158#define DEFAULT_BYTE_BURSTS 32 // compromise between time and rigor
10159
10160static void setup_hw_pattern(struct ddr_priv *priv, int lmc,
10161 const u64 *pattern_p)
10162{
10163 /*
10164 * 3) Setup GENERAL_PURPOSE[0-2] registers with the data pattern
10165 * of choice.
10166 * a. GENERAL_PURPOSE0[DATA<63:0>] – sets the initial lower
10167 * (rising edge) 64 bits of data.
10168 * b. GENERAL_PURPOSE1[DATA<63:0>] – sets the initial upper
10169 * (falling edge) 64 bits of data.
10170 * c. GENERAL_PURPOSE2[DATA<15:0>] – sets the initial lower
10171 * (rising edge <7:0>) and upper
10172 * (falling edge <15:8>) ECC data.
10173 */
10174 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), pattern_p[0]);
10175 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), pattern_p[1]);
10176 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), pattern_p[2]);
10177}
10178
10179static void setup_lfsr_pattern(struct ddr_priv *priv, int lmc, u32 data)
10180{
10181 union cvmx_lmcx_char_ctl char_ctl;
10182 u32 prbs;
10183 const char *s;
10184
10185 s = env_get("ddr_lfsr_prbs");
10186 if (s)
10187 prbs = simple_strtoul(s, NULL, 0);
10188 else
10189 prbs = data;
10190
10191 /*
10192 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10193 * here data comes from the LFSR generating a PRBS pattern
10194 * CHAR_CTL.EN = 0
10195 * CHAR_CTL.SEL = 0; // for PRBS
10196 * CHAR_CTL.DR = 1;
10197 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10198 * CHAR_CTL.SKEW_ON = 1;
10199 */
10200 char_ctl.u64 = lmc_rd(priv, CVMX_LMCX_CHAR_CTL(lmc));
10201 char_ctl.s.en = 0;
10202 char_ctl.s.sel = 0;
10203 char_ctl.s.dr = 1;
10204 char_ctl.s.prbs = prbs;
10205 char_ctl.s.skew_on = 1;
10206 lmc_wr(priv, CVMX_LMCX_CHAR_CTL(lmc), char_ctl.u64);
10207}
10208
10209static int choose_best_hw_patterns(int lmc, int mode)
10210{
10211 int new_mode = mode;
10212 const char *s;
10213
10214 switch (mode) {
10215 case DBTRAIN_TEST: // always choose LFSR if chip supports it
10216 if (octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10217 int lfsr_enable = 1;
10218
10219 s = env_get("ddr_allow_lfsr");
10220 if (s) {
10221 // override?
10222 lfsr_enable = !!strtoul(s, NULL, 0);
10223 }
10224
10225 if (lfsr_enable)
10226 new_mode = DBTRAIN_LFSR;
10227 }
10228 break;
10229
10230 case DBTRAIN_DBI: // possibly can allow LFSR use?
10231 break;
10232
10233 case DBTRAIN_LFSR: // forced already
10234 if (!octeon_is_cpuid(OCTEON_CN78XX_PASS2_X)) {
10235 debug("ERROR: illegal HW assist mode %d\n", mode);
10236 new_mode = DBTRAIN_TEST;
10237 }
10238 break;
10239
10240 default:
10241 debug("ERROR: unknown HW assist mode %d\n", mode);
10242 }
10243
10244 if (new_mode != mode)
10245 debug("%s: changing mode %d to %d\n", __func__, mode, new_mode);
10246
10247 return new_mode;
10248}
10249
10250int run_best_hw_patterns(struct ddr_priv *priv, int lmc, u64 phys_addr,
10251 int mode, u64 *xor_data)
10252{
10253 int pattern;
10254 const u64 *pattern_p;
10255 int errs, errors = 0;
10256
10257 // FIXME? always choose LFSR if chip supports it???
10258 mode = choose_best_hw_patterns(lmc, mode);
10259
10260 for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10261 if (mode == DBTRAIN_LFSR) {
10262 setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10263 } else {
10264 pattern_p = byte_patterns[pattern];
10265 setup_hw_pattern(priv, lmc, pattern_p);
10266 }
10267 errs = test_dram_byte_hw(priv, lmc, phys_addr, mode, xor_data);
10268
10269 debug("%s: PATTERN %d at A:0x%012llx errors 0x%x\n",
10270 __func__, pattern, phys_addr, errs);
10271
10272 errors |= errs;
10273 }
10274
10275 return errors;
10276}
10277
10278static void hw_assist_test_dll_offset(struct ddr_priv *priv,
10279 int dll_offset_mode, int lmc,
10280 int bytelane,
10281 int if_64b,
10282 u64 dram_tune_rank_offset,
10283 int dram_tune_byte_bursts)
10284{
10285 int byte_offset, new_best_offset[9];
10286 int rank_delay_start[4][9];
10287 int rank_delay_count[4][9];
10288 int rank_delay_best_start[4][9];
10289 int rank_delay_best_count[4][9];
10290 int errors[4], off_errors, tot_errors;
10291 int rank_mask, rankx, active_ranks;
10292 int pattern;
10293 const u64 *pattern_p;
10294 int byte;
10295 char *mode_str = (dll_offset_mode == 2) ? "Read" : "Write";
10296 int pat_best_offset[9];
10297 u64 phys_addr;
10298 int pat_beg, pat_end;
10299 int rank_beg, rank_end;
10300 int byte_lo, byte_hi;
10301 union cvmx_lmcx_config lmcx_config;
10302 u64 hw_rank_offset;
10303 int num_lmcs = cvmx_dram_get_num_lmc(priv);
10304 // FIXME? always choose LFSR if chip supports it???
10305 int mode = choose_best_hw_patterns(lmc, DBTRAIN_TEST);
10306 int node = 0;
10307
10308 if (bytelane == 0x0A) { // all bytelanes
10309 byte_lo = 0;
10310 byte_hi = 8;
10311 } else { // just 1
10312 byte_lo = bytelane;
10313 byte_hi = bytelane;
10314 }
10315
10316 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10317 rank_mask = lmcx_config.s.init_status;
10318
10319 // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10320 hw_rank_offset =
10321 1ull << (28 + lmcx_config.s.pbank_lsb - lmcx_config.s.rank_ena +
10322 (num_lmcs / 2));
10323
10324 debug("N%d: %s: starting LMC%d with rank offset 0x%016llx\n",
10325 node, __func__, lmc, (unsigned long long)hw_rank_offset);
10326
10327 // start of pattern loop
10328 // we do the set of tests for each pattern supplied...
10329
10330 memset(new_best_offset, 0, sizeof(new_best_offset));
10331 for (pattern = 0; pattern < NUM_BYTE_PATTERNS; pattern++) {
10332 memset(pat_best_offset, 0, sizeof(pat_best_offset));
10333
10334 if (mode == DBTRAIN_TEST) {
10335 pattern_p = byte_patterns[pattern];
10336 setup_hw_pattern(priv, lmc, pattern_p);
10337 } else {
10338 setup_lfsr_pattern(priv, lmc, lfsr_patterns[pattern]);
10339 }
10340
10341 // now loop through all legal values for the DLL byte offset...
10342
10343#define BYTE_OFFSET_INCR 3 // FIXME: make this tunable?
10344
10345 tot_errors = 0;
10346
10347 memset(rank_delay_count, 0, sizeof(rank_delay_count));
10348 memset(rank_delay_start, 0, sizeof(rank_delay_start));
10349 memset(rank_delay_best_count, 0, sizeof(rank_delay_best_count));
10350 memset(rank_delay_best_start, 0, sizeof(rank_delay_best_start));
10351
10352 for (byte_offset = -63; byte_offset < 64;
10353 byte_offset += BYTE_OFFSET_INCR) {
10354 // do the setup on the active LMC
10355 // set the bytelanes DLL offsets
10356 change_dll_offset_enable(priv, lmc, 0);
10357 // FIXME? bytelane?
10358 load_dll_offset(priv, lmc, dll_offset_mode,
10359 byte_offset, bytelane);
10360 change_dll_offset_enable(priv, lmc, 1);
10361
10362 //bdk_watchdog_poke();
10363
10364 // run the test on each rank
10365 // only 1 call per rank should be enough, let the
10366 // bursts, loops, etc, control the load...
10367
10368 // errors for this byte_offset, all ranks
10369 off_errors = 0;
10370
10371 active_ranks = 0;
10372
10373 for (rankx = 0; rankx < 4; rankx++) {
10374 if (!(rank_mask & (1 << rankx)))
10375 continue;
10376
10377 phys_addr = hw_rank_offset * active_ranks;
10378 // FIXME: now done by test_dram_byte_hw()
10379 //phys_addr |= (lmc << 7);
10380 //phys_addr |= (u64)node << CVMX_NODE_MEM_SHIFT;
10381
10382 active_ranks++;
10383
10384 // NOTE: return is a now a bitmask of the
10385 // erroring bytelanes.
10386 errors[rankx] =
10387 test_dram_byte_hw(priv, lmc, phys_addr,
10388 mode, NULL);
10389
10390 // process any errors in the bytelane(s) that
10391 // are being tested
10392 for (byte = byte_lo; byte <= byte_hi; byte++) {
10393 // check errors
10394 // yes, an error in the byte lane in
10395 // this rank
10396 if (errors[rankx] & (1 << byte)) {
10397 off_errors |= (1 << byte);
10398
10399 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: Address 0x%012llx errors\n",
10400 node, lmc, rankx, byte,
10401 mode_str, byte_offset,
10402 phys_addr);
10403
10404 // had started run
10405 if (rank_delay_count
10406 [rankx][byte] > 0) {
10407 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: stopping a run here\n",
10408 node, lmc, rankx,
10409 byte, mode_str,
10410 byte_offset);
10411 // stop now
10412 rank_delay_count
10413 [rankx][byte] =
10414 0;
10415 }
10416 // FIXME: else had not started
10417 // run - nothing else to do?
10418 } else {
10419 // no error in the byte lane
10420 // first success, set run start
10421 if (rank_delay_count[rankx]
10422 [byte] == 0) {
10423 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: starting a run here\n",
10424 node, lmc, rankx,
10425 byte, mode_str,
10426 byte_offset);
10427 rank_delay_start[rankx]
10428 [byte] =
10429 byte_offset;
10430 }
10431 // bump run length
10432 rank_delay_count[rankx][byte]
10433 += BYTE_OFFSET_INCR;
10434
10435 // is this now the biggest
10436 // window?
10437 if (rank_delay_count[rankx]
10438 [byte] >
10439 rank_delay_best_count[rankx]
10440 [byte]) {
10441 rank_delay_best_count
10442 [rankx][byte] =
10443 rank_delay_count
10444 [rankx][byte];
10445 rank_delay_best_start
10446 [rankx][byte] =
10447 rank_delay_start
10448 [rankx][byte];
10449 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test %3d: updating best to %d/%d\n",
10450 node, lmc, rankx,
10451 byte, mode_str,
10452 byte_offset,
10453 rank_delay_best_start
10454 [rankx][byte],
10455 rank_delay_best_count
10456 [rankx][byte]);
10457 }
10458 }
10459 }
10460 } /* for (rankx = 0; rankx < 4; rankx++) */
10461
10462 tot_errors |= off_errors;
10463 }
10464
10465 // set the bytelanes DLL offsets all back to 0
10466 change_dll_offset_enable(priv, lmc, 0);
10467 load_dll_offset(priv, lmc, dll_offset_mode, 0, bytelane);
10468 change_dll_offset_enable(priv, lmc, 1);
10469
10470 // now choose the best byte_offsets for this pattern
10471 // according to the best windows of the tested ranks
10472 // calculate offset by constructing an average window
10473 // from the rank windows
10474 for (byte = byte_lo; byte <= byte_hi; byte++) {
10475 pat_beg = -999;
10476 pat_end = 999;
10477
10478 for (rankx = 0; rankx < 4; rankx++) {
10479 if (!(rank_mask & (1 << rankx)))
10480 continue;
10481
10482 rank_beg = rank_delay_best_start[rankx][byte];
10483 pat_beg = max(pat_beg, rank_beg);
10484 rank_end = rank_beg +
10485 rank_delay_best_count[rankx][byte] -
10486 BYTE_OFFSET_INCR;
10487 pat_end = min(pat_end, rank_end);
10488
10489 debug("N%d.LMC%d.R%d: Bytelane %d DLL %s Offset Test: Rank Window %3d:%3d\n",
10490 node, lmc, rankx, byte, mode_str,
10491 rank_beg, rank_end);
10492
10493 } /* for (rankx = 0; rankx < 4; rankx++) */
10494
10495 pat_best_offset[byte] = (pat_end + pat_beg) / 2;
10496
10497 // sum the pattern averages
10498 new_best_offset[byte] += pat_best_offset[byte];
10499 }
10500
10501 // now print them on 1 line, descending order...
10502 debug("N%d.LMC%d: HW DLL %s Offset Pattern %d :",
10503 node, lmc, mode_str, pattern);
10504 for (byte = byte_hi; byte >= byte_lo; --byte)
10505 debug(" %4d", pat_best_offset[byte]);
10506 debug("\n");
10507 }
10508 // end of pattern loop
10509
10510 debug("N%d.LMC%d: HW DLL %s Offset Average : ", node, lmc, mode_str);
10511
10512 // print in decending byte index order
10513 for (byte = byte_hi; byte >= byte_lo; --byte) {
10514 // create the new average NINT
10515 new_best_offset[byte] = divide_nint(new_best_offset[byte],
10516 NUM_BYTE_PATTERNS);
10517
10518 // print the best offsets from all patterns
10519
10520 // print just the offset of all the bytes
10521 if (bytelane == 0x0A)
10522 debug("%4d ", new_best_offset[byte]);
10523 else // print the bytelanes also
10524 debug("(byte %d) %4d ", byte, new_best_offset[byte]);
10525
10526 // done with testing, load up the best offsets we found...
10527 // disable offsets while we load...
10528 change_dll_offset_enable(priv, lmc, 0);
10529 load_dll_offset(priv, lmc, dll_offset_mode,
10530 new_best_offset[byte], byte);
10531 // re-enable the offsets now that we are done loading
10532 change_dll_offset_enable(priv, lmc, 1);
10533 }
10534
10535 debug("\n");
10536}
10537
10538/*
10539 * Automatically adjust the DLL offset for the selected bytelane using
10540 * hardware-assist
10541 */
10542static int perform_HW_dll_offset_tuning(struct ddr_priv *priv,
10543 int dll_offset_mode, int bytelane)
10544{
10545 int if_64b;
10546 int save_ecc_ena[4];
10547 union cvmx_lmcx_config lmc_config;
10548 int lmc, num_lmcs = cvmx_dram_get_num_lmc(priv);
10549 const char *s;
10550 int loops = 1, loop;
10551 int by;
10552 u64 dram_tune_rank_offset;
10553 int dram_tune_byte_bursts = DEFAULT_BYTE_BURSTS;
10554 int node = 0;
10555
10556 // see if we want to do the tuning more than once per LMC...
10557 s = env_get("ddr_tune_ecc_loops");
10558 if (s)
10559 loops = strtoul(s, NULL, 0);
10560
10561 // allow override of the test repeats (bursts)
10562 s = env_get("ddr_tune_byte_bursts");
10563 if (s)
10564 dram_tune_byte_bursts = strtoul(s, NULL, 10);
10565
10566 // print current working values
10567 debug("N%d: H/W Tuning for bytelane %d will use %d loops, %d bursts, and %d patterns.\n",
10568 node, bytelane, loops, dram_tune_byte_bursts, NUM_BYTE_PATTERNS);
10569
10570 // FIXME? get flag from LMC0 only
10571 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0));
10572 if_64b = !lmc_config.s.mode32b;
10573
10574 // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10575 dram_tune_rank_offset =
10576 1ull << (28 + lmc_config.s.pbank_lsb - lmc_config.s.rank_ena +
10577 (num_lmcs / 2));
10578
10579 // do once for each active LMC
10580
10581 for (lmc = 0; lmc < num_lmcs; lmc++) {
10582 debug("N%d: H/W Tuning: starting LMC%d bytelane %d tune.\n",
10583 node, lmc, bytelane);
10584
10585 /* Enable ECC for the HW tests */
10586 // NOTE: we do enable ECC, but the HW tests used will not
10587 // generate "visible" errors
10588 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10589 save_ecc_ena[lmc] = lmc_config.s.ecc_ena;
10590 lmc_config.s.ecc_ena = 1;
10591 lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10592 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10593
10594 // testing is done on a single LMC at a time
10595 // FIXME: for now, loop here to show what happens multiple times
10596 for (loop = 0; loop < loops; loop++) {
10597 /* Perform DLL offset tuning */
10598 hw_assist_test_dll_offset(priv, 2 /* 2=read */, lmc,
10599 bytelane,
10600 if_64b, dram_tune_rank_offset,
10601 dram_tune_byte_bursts);
10602 }
10603
10604 // perform cleanup on active LMC
10605 debug("N%d: H/W Tuning: finishing LMC%d bytelane %d tune.\n",
10606 node, lmc, bytelane);
10607
10608 /* Restore ECC for DRAM tests */
10609 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10610 lmc_config.s.ecc_ena = save_ecc_ena[lmc];
10611 lmc_wr(priv, CVMX_LMCX_CONFIG(lmc), lmc_config.u64);
10612 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10613
10614 // finally, see if there are any read offset overrides
10615 // after tuning
10616 for (by = 0; by < 9; by++) {
10617 s = lookup_env(priv, "ddr%d_tune_byte%d", lmc, by);
10618 if (s) {
10619 int dllro = strtoul(s, NULL, 10);
10620
10621 change_dll_offset_enable(priv, lmc, 0);
10622 load_dll_offset(priv, lmc, 2, dllro, by);
10623 change_dll_offset_enable(priv, lmc, 1);
10624 }
10625 }
10626
10627 } /* for (lmc = 0; lmc < num_lmcs; lmc++) */
10628
10629 // finish up...
10630
10631 return 0;
10632
10633} /* perform_HW_dll_offset_tuning */
10634
10635// this routine simply makes the calls to the tuning routine and returns
10636// any errors
10637static int cvmx_tune_node(struct ddr_priv *priv)
10638{
10639 int errs, tot_errs;
10640 int do_dllwo = 0; // default to NO
10641 const char *str;
10642 int node = 0;
10643
10644 // Automatically tune the data and ECC byte DLL read offsets
10645 debug("N%d: Starting DLL Read Offset Tuning for LMCs\n", node);
10646 errs = perform_HW_dll_offset_tuning(priv, 2, 0x0A /* all bytelanes */);
10647 debug("N%d: Finished DLL Read Offset Tuning for LMCs, %d errors\n",
10648 node, errs);
10649 tot_errs = errs;
10650
10651 // disabled by default for now, does not seem to be needed?
10652 // Automatically tune the data and ECC byte DLL write offsets
10653 // allow override of default setting
10654 str = env_get("ddr_tune_write_offsets");
10655 if (str)
10656 do_dllwo = !!strtoul(str, NULL, 0);
10657 if (do_dllwo) {
10658 debug("N%d: Starting DLL Write Offset Tuning for LMCs\n", node);
10659 errs =
10660 perform_HW_dll_offset_tuning(priv, 1,
10661 0x0A /* all bytelanes */);
10662 debug("N%d: Finished DLL Write Offset Tuning for LMCs, %d errors\n",
10663 node, errs);
10664 tot_errs += errs;
10665 }
10666
10667 return tot_errs;
10668}
10669
10670// this routine makes the calls to the tuning routines when criteria are met
10671// intended to be called for automated tuning, to apply filtering...
10672
10673#define IS_DDR4 1
10674#define IS_DDR3 0
10675#define IS_RDIMM 1
10676#define IS_UDIMM 0
10677#define IS_1SLOT 1
10678#define IS_2SLOT 0
10679
10680// FIXME: DDR3 is not tuned
10681static const u32 ddr_speed_filter[2][2][2] = {
10682 [IS_DDR4] = {
10683 [IS_RDIMM] = {
10684 [IS_1SLOT] = 940,
10685 [IS_2SLOT] = 800},
10686 [IS_UDIMM] = {
10687 [IS_1SLOT] = 1050,
10688 [IS_2SLOT] = 940},
10689 },
10690 [IS_DDR3] = {
10691 [IS_RDIMM] = {
10692 [IS_1SLOT] = 0, // disabled
10693 [IS_2SLOT] = 0 // disabled
10694 },
10695 [IS_UDIMM] = {
10696 [IS_1SLOT] = 0, // disabled
10697 [IS_2SLOT] = 0 // disabled
10698 }
10699 }
10700};
10701
10702void cvmx_maybe_tune_node(struct ddr_priv *priv, u32 ddr_speed)
10703{
10704 const char *s;
10705 union cvmx_lmcx_config lmc_config;
10706 union cvmx_lmcx_control lmc_control;
10707 union cvmx_lmcx_ddr_pll_ctl lmc_ddr_pll_ctl;
10708 int is_ddr4;
10709 int is_rdimm;
10710 int is_1slot;
10711 int do_tune = 0;
10712 u32 ddr_min_speed;
10713 int node = 0;
10714
10715 // scale it down from Hz to MHz
10716 ddr_speed = divide_nint(ddr_speed, 1000000);
10717
10718 // FIXME: allow an override here so that all configs can be tuned
10719 // or none
10720 // If the envvar is defined, always either force it or avoid it
10721 // accordingly
10722 s = env_get("ddr_tune_all_configs");
10723 if (s) {
10724 do_tune = !!strtoul(s, NULL, 0);
10725 printf("N%d: DRAM auto-tuning %s.\n", node,
10726 (do_tune) ? "forced" : "disabled");
10727 if (do_tune)
10728 cvmx_tune_node(priv);
10729
10730 return;
10731 }
10732
10733 // filter the tuning calls here...
10734 // determine if we should/can run automatically for this configuration
10735 //
10736 // FIXME: tune only when the configuration indicates it will help:
10737 // DDR type, RDIMM or UDIMM, 1-slot or 2-slot, and speed
10738 //
10739 lmc_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(0)); // sample LMC0
10740 lmc_control.u64 = lmc_rd(priv, CVMX_LMCX_CONTROL(0)); // sample LMC0
10741 // sample LMC0
10742 lmc_ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10743
10744 is_ddr4 = (lmc_ddr_pll_ctl.s.ddr4_mode != 0);
10745 is_rdimm = (lmc_control.s.rdimm_ena != 0);
10746 // HACK, should do better
10747 is_1slot = (lmc_config.s.init_status < 4);
10748
10749 ddr_min_speed = ddr_speed_filter[is_ddr4][is_rdimm][is_1slot];
10750 do_tune = ((ddr_min_speed != 0) && (ddr_speed > ddr_min_speed));
10751
10752 debug("N%d: DDR%d %cDIMM %d-slot at %d MHz %s eligible for auto-tuning.\n",
10753 node, (is_ddr4) ? 4 : 3, (is_rdimm) ? 'R' : 'U',
10754 (is_1slot) ? 1 : 2, ddr_speed, (do_tune) ? "is" : "is not");
10755
10756 // call the tuning routine, filtering is done...
10757 if (do_tune)
10758 cvmx_tune_node(priv);
10759}
10760
10761/*
10762 * first pattern example:
10763 * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10764 * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10765 * GENERAL_PURPOSE0.DATA == 16'h0000;
10766 */
10767
10768static const u64 dbi_pattern[3] = {
10769 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x0000ULL };
10770
10771// Perform switchover to DBI
10772static void cvmx_dbi_switchover_interface(struct ddr_priv *priv, int lmc)
10773{
10774 union cvmx_lmcx_modereg_params0 modereg_params0;
10775 union cvmx_lmcx_modereg_params3 modereg_params3;
10776 union cvmx_lmcx_phy_ctl phy_ctl;
10777 union cvmx_lmcx_config lmcx_config;
10778 union cvmx_lmcx_ddr_pll_ctl ddr_pll_ctl;
10779 int rank_mask, rankx, active_ranks;
10780 u64 phys_addr, rank_offset;
10781 int num_lmcs, errors;
10782 int dbi_settings[9], byte, unlocked, retries;
10783 int ecc_ena;
10784 int rank_max = 1; // FIXME: make this 4 to try all the ranks
10785 int node = 0;
10786
10787 ddr_pll_ctl.u64 = lmc_rd(priv, CVMX_LMCX_DDR_PLL_CTL(0));
10788
10789 lmcx_config.u64 = lmc_rd(priv, CVMX_LMCX_CONFIG(lmc));
10790 rank_mask = lmcx_config.s.init_status;
10791 ecc_ena = lmcx_config.s.ecc_ena;
10792
10793 // FIXME: must filter out any non-supported configs
10794 // ie, no DDR3, no x4 devices
10795 if (ddr_pll_ctl.s.ddr4_mode == 0 || lmcx_config.s.mode_x4dev == 1) {
10796 debug("N%d.LMC%d: DBI switchover: inappropriate device; EXITING...\n",
10797 node, lmc);
10798 return;
10799 }
10800
10801 // this should be correct for 1 or 2 ranks, 1 or 2 DIMMs
10802 num_lmcs = cvmx_dram_get_num_lmc(priv);
10803 rank_offset = 1ull << (28 + lmcx_config.s.pbank_lsb -
10804 lmcx_config.s.rank_ena + (num_lmcs / 2));
10805
10806 debug("N%d.LMC%d: DBI switchover: rank mask 0x%x, rank size 0x%016llx.\n",
10807 node, lmc, rank_mask, (unsigned long long)rank_offset);
10808
10809 /*
10810 * 1. conduct the current init sequence as usual all the way
10811 * after software write leveling.
10812 */
10813
10814 read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10815
10816 display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
10817 " INIT");
10818
10819 /*
10820 * 2. set DBI related CSRs as below and issue MR write.
10821 * MODEREG_PARAMS3.WR_DBI=1
10822 * MODEREG_PARAMS3.RD_DBI=1
10823 * PHY_CTL.DBI_MODE_ENA=1
10824 */
10825 modereg_params0.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc));
10826
10827 modereg_params3.u64 = lmc_rd(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc));
10828 modereg_params3.s.wr_dbi = 1;
10829 modereg_params3.s.rd_dbi = 1;
10830 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS3(lmc), modereg_params3.u64);
10831
10832 phy_ctl.u64 = lmc_rd(priv, CVMX_LMCX_PHY_CTL(lmc));
10833 phy_ctl.s.dbi_mode_ena = 1;
10834 lmc_wr(priv, CVMX_LMCX_PHY_CTL(lmc), phy_ctl.u64);
10835
10836 /*
10837 * there are two options for data to send. Lets start with (1)
10838 * and could move to (2) in the future:
10839 *
10840 * 1) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 0 (or for older chips where
10841 * this does not exist) set data directly in these reigsters.
10842 * this will yield a clk/2 pattern:
10843 * GENERAL_PURPOSE0.DATA == 64'h00ff00ff00ff00ff;
10844 * GENERAL_PURPOSE1.DATA == 64'h00ff00ff00ff00ff;
10845 * GENERAL_PURPOSE0.DATA == 16'h0000;
10846 * 2) DBTRAIN_CTL[LFSR_PATTERN_SEL] = 1
10847 * here data comes from the LFSR generating a PRBS pattern
10848 * CHAR_CTL.EN = 0
10849 * CHAR_CTL.SEL = 0; // for PRBS
10850 * CHAR_CTL.DR = 1;
10851 * CHAR_CTL.PRBS = setup for whatever type of PRBS to send
10852 * CHAR_CTL.SKEW_ON = 1;
10853 */
10854 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE0(lmc), dbi_pattern[0]);
10855 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE1(lmc), dbi_pattern[1]);
10856 lmc_wr(priv, CVMX_LMCX_GENERAL_PURPOSE2(lmc), dbi_pattern[2]);
10857
10858 /*
10859 * 3. adjust cas_latency (only necessary if RD_DBI is set).
10860 * here is my code for doing this:
10861 *
10862 * if (csr_model.MODEREG_PARAMS3.RD_DBI.value == 1) begin
10863 * case (csr_model.MODEREG_PARAMS0.CL.value)
10864 * 0,1,2,3,4: csr_model.MODEREG_PARAMS0.CL.value += 2;
10865 * // CL 9-13 -> 11-15
10866 * 5: begin
10867 * // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10868 * if((csr_model.MODEREG_PARAMS0.CWL.value==1 ||
10869 * csr_model.MODEREG_PARAMS0.CWL.value==3))
10870 * csr_model.MODEREG_PARAMS0.CL.value = 7; // 14->16
10871 * else
10872 * csr_model.MODEREG_PARAMS0.CL.value = 13; // 14->17
10873 * end
10874 * 6: csr_model.MODEREG_PARAMS0.CL.value = 8; // 15->18
10875 * 7: csr_model.MODEREG_PARAMS0.CL.value = 14; // 16->19
10876 * 8: csr_model.MODEREG_PARAMS0.CL.value = 15; // 18->21
10877 * default:
10878 * `cn_fatal(("Error mem_cfg (%s) CL (%d) with RD_DBI=1,
10879 * I am not sure what to do.",
10880 * mem_cfg, csr_model.MODEREG_PARAMS3.RD_DBI.value))
10881 * endcase
10882 * end
10883 */
10884
10885 if (modereg_params3.s.rd_dbi == 1) {
10886 int old_cl, new_cl, old_cwl;
10887
10888 old_cl = modereg_params0.s.cl;
10889 old_cwl = modereg_params0.s.cwl;
10890
10891 switch (old_cl) {
10892 case 0:
10893 case 1:
10894 case 2:
10895 case 3:
10896 case 4:
10897 new_cl = old_cl + 2;
10898 break; // 9-13->11-15
10899 // CL=14, CWL=10,12 gets +2, CLW=11,14 gets +3
10900 case 5:
10901 new_cl = ((old_cwl == 1) || (old_cwl == 3)) ? 7 : 13;
10902 break;
10903 case 6:
10904 new_cl = 8;
10905 break; // 15->18
10906 case 7:
10907 new_cl = 14;
10908 break; // 16->19
10909 case 8:
10910 new_cl = 15;
10911 break; // 18->21
10912 default:
10913 printf("ERROR: Bad CL value (%d) for DBI switchover.\n",
10914 old_cl);
10915 // FIXME: need to error exit here...
10916 old_cl = -1;
10917 new_cl = -1;
10918 break;
10919 }
10920 debug("N%d.LMC%d: DBI switchover: CL ADJ: old_cl 0x%x, old_cwl 0x%x, new_cl 0x%x.\n",
10921 node, lmc, old_cl, old_cwl, new_cl);
10922 modereg_params0.s.cl = new_cl;
10923 lmc_wr(priv, CVMX_LMCX_MODEREG_PARAMS0(lmc),
10924 modereg_params0.u64);
10925 }
10926
10927 /*
10928 * 4. issue MRW to MR0 (CL) and MR5 (DBI), using LMC sequence
10929 * SEQ_CTL[SEQ_SEL] = MRW.
10930 */
10931 // Use the default values, from the CSRs fields
10932 // also, do B-sides for RDIMMs...
10933
10934 for (rankx = 0; rankx < 4; rankx++) {
10935 if (!(rank_mask & (1 << rankx)))
10936 continue;
10937
10938 // for RDIMMs, B-side writes should get done automatically
10939 // when the A-side is written
10940 ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10941 0 /*MRreg */, 0 /*A-side */); /* MR0 */
10942 ddr4_mrw(priv, lmc, rankx, -1 /* use_default */,
10943 5 /*MRreg */, 0 /*A-side */); /* MR5 */
10944 }
10945
10946 /*
10947 * 5. conduct DBI bit deskew training via the General Purpose
10948 * R/W sequence (dbtrain). may need to run this over and over to get
10949 * a lock (I need up to 5 in simulation):
10950 * SEQ_CTL[SEQ_SEL] = RW_TRAINING (15)
10951 * DBTRAIN_CTL.CMD_COUNT_EXT = all 1's
10952 * DBTRAIN_CTL.READ_CMD_COUNT = all 1's
10953 * DBTRAIN_CTL.TCCD_SEL = set according to MODEREG_PARAMS3[TCCD_L]
10954 * DBTRAIN_CTL.RW_TRAIN = 1
10955 * DBTRAIN_CTL.READ_DQ_COUNT = dont care
10956 * DBTRAIN_CTL.WRITE_ENA = 1;
10957 * DBTRAIN_CTL.ACTIVATE = 1;
10958 * DBTRAIN_CTL LRANK, PRANK, ROW_A, BG, BA, COLUMN_A = set to a
10959 * valid address
10960 */
10961
10962 // NOW - do the training
10963 debug("N%d.LMC%d: DBI switchover: TRAINING begins...\n", node, lmc);
10964
10965 active_ranks = 0;
10966 for (rankx = 0; rankx < rank_max; rankx++) {
10967 if (!(rank_mask & (1 << rankx)))
10968 continue;
10969
10970 phys_addr = rank_offset * active_ranks;
10971 // FIXME: now done by test_dram_byte_hw()
10972
10973 active_ranks++;
10974
10975 retries = 0;
10976
10977restart_training:
10978
10979 // NOTE: return is a bitmask of the erroring bytelanes -
10980 // we only print it
10981 errors =
10982 test_dram_byte_hw(priv, lmc, phys_addr, DBTRAIN_DBI, NULL);
10983
10984 debug("N%d.LMC%d: DBI switchover: TEST: rank %d, phys_addr 0x%llx, errors 0x%x.\n",
10985 node, lmc, rankx, (unsigned long long)phys_addr, errors);
10986
10987 // NEXT - check for locking
10988 unlocked = 0;
10989 read_dac_dbi_settings(priv, lmc, /*DBI*/ 0, dbi_settings);
10990
10991 for (byte = 0; byte < (8 + ecc_ena); byte++)
10992 unlocked += (dbi_settings[byte] & 1) ^ 1;
10993
10994 // FIXME: print out the DBI settings array after each rank?
10995 if (rank_max > 1) // only when doing more than 1 rank
10996 display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena,
10997 dbi_settings, " RANK");
10998
10999 if (unlocked > 0) {
11000 debug("N%d.LMC%d: DBI switchover: LOCK: %d still unlocked.\n",
11001 node, lmc, unlocked);
11002 retries++;
11003 if (retries < 10) {
11004 goto restart_training;
11005 } else {
11006 debug("N%d.LMC%d: DBI switchover: LOCK: %d retries exhausted.\n",
11007 node, lmc, retries);
11008 }
11009 }
11010 } /* for (rankx = 0; rankx < 4; rankx++) */
11011
11012 // print out the final DBI settings array
11013 display_dac_dbi_settings(lmc, /*DBI*/ 0, ecc_ena, dbi_settings,
11014 "FINAL");
11015}
11016
11017void cvmx_dbi_switchover(struct ddr_priv *priv)
11018{
11019 int lmc;
11020 int num_lmcs = cvmx_dram_get_num_lmc(priv);
11021
11022 for (lmc = 0; lmc < num_lmcs; lmc++)
11023 cvmx_dbi_switchover_interface(priv, lmc);
11024}