ppc4xx: Autocalibration can set RDCC to over aggressive value.

The criteria of the AMCC SDRAM Controller DDR autocalibration
U-Boot code is to pick the largest passing write/read/compare
window that also has the smallest SDRAM_RDCC.[RDSS] Read Sample
Cycle Select value.

On some Kilauea boards the DDR autocalibration algorithm can
find a large passing write/read/compare window with a small
SDRAM_RDCC.[RDSS] aggressive value of Read Sample Cycle Select
value "T1 Sample".

This SDRAM_RDCC.[RDSS] Read Sample Cycle Select value of
"T1 Sample" proves to be to aggressive when later on U-Boot
relocates into DDR memory and executes.

The memory traces on the Kilauea board are short so on some
Kilauea boards the SDRAM_RDCC.[RDSS] Read Sample Cycle Select
value of "T1 Sample" shows up as a potentially valid value for
the DDR autocalibratiion algorithm.

The fix is to define a weak default function which provides
the minimum SDRAM_RDCC.[RDSS] Read Sample Cycle Select value
to accept for DDR autocalibration.  The default will be the
"T2 Sample" value.  A board developer who has a well defined
board and chooses to be more aggressive can always provide
their own board specific string function with the more
aggressive "T1 Sample" value or stick with the default
minimum SDRAM_RDCC.[RDSS] value of "T2".

Also put in a autocalibration loop fix for case where current
write/read/compare passing window size is the same as a prior
window size, then in this case choose the write/read/compare
result that has the associated smallest RDCC T-Sample value.

Signed-off-by: Adam Graham <agraham@amcc.com>
Signed-off-by: Stefan Roese <sr@denx.de>
diff --git a/cpu/ppc4xx/4xx_ibm_ddr2_autocalib.c b/cpu/ppc4xx/4xx_ibm_ddr2_autocalib.c
index 1e3e20d..91bf582 100644
--- a/cpu/ppc4xx/4xx_ibm_ddr2_autocalib.c
+++ b/cpu/ppc4xx/4xx_ibm_ddr2_autocalib.c
@@ -61,6 +61,8 @@
 #define NUMLOOPS		1	/* configure as you deem approporiate */
 #define NUMMEMWORDS		16
 
+#define SDRAM_RDCC_RDSS_VAL(n)	SDRAM_RDCC_RDSS_DECODE(ddr_rdss_opt(n))
+
 /* Private Structure Definitions */
 
 struct autocal_regs {
@@ -147,6 +149,13 @@
 }
 ulong ddr_scan_option(ulong) __attribute__((weak, alias("__ddr_scan_option")));
 
+u32 __ddr_rdss_opt(u32 default_val)
+{
+	return default_val;
+}
+u32 ddr_rdss_opt(ulong) __attribute__((weak, alias("__ddr_rdss_opt")));
+
+
 static u32 *get_membase(int bxcr_num)
 {
 	ulong bxcf;
@@ -341,6 +350,7 @@
 			ppcDcbf((ulong)&(base_address[j]));
 		}
 		sync();
+		iobarrier_rw();
 		for (l = 0; l < NUMLOOPS; l++) {
 			for (j = 0; j < NUMMEMWORDS; j++) {
 				if (base_address[j] != test[i][j]) {
@@ -355,6 +365,7 @@
 				ppcDcbf((u32)&(base_address[j]));
 			} /* for (j = 0; j < NUMMEMWORDS; j++) */
 			sync();
+			iobarrier_rw();
 		} /* for (l=0; l<NUMLOOPS; l++) */
 	}
 
@@ -447,7 +458,8 @@
 	 * Program RDCC register
 	 * Read sample cycle auto-update enable
 	 */
-	mtsdram(SDRAM_RDCC, SDRAM_RDCC_RDSS_T1 | SDRAM_RDCC_RSAE_ENABLE);
+	mtsdram(SDRAM_RDCC,
+		ddr_rdss_opt(SDRAM_RDCC_RDSS_T2) | SDRAM_RDCC_RSAE_ENABLE);
 
 #ifdef DEBUG
 	mfsdram(SDRAM_RDCC, temp);
@@ -633,7 +645,8 @@
 	 * Program RDCC register
 	 * Read sample cycle auto-update enable
 	 */
-	mtsdram(SDRAM_RDCC, SDRAM_RDCC_RDSS_T2 | SDRAM_RDCC_RSAE_ENABLE);
+	mtsdram(SDRAM_RDCC,
+		ddr_rdss_opt(SDRAM_RDCC_RDSS_T2) | SDRAM_RDCC_RSAE_ENABLE);
 
 #ifdef DEBUG
 	mfsdram(SDRAM_RDCC, temp);
@@ -1091,32 +1104,36 @@
 		 * if no passing window was found, or is the
 		 * size of the RFFD passing window.
 		 */
-		if (result != 0) {
-			tcal.autocal.flags = 1;
-			debug("*** (%d)(%d) result passed window size: 0x%08x, "
-			      "rqfd = 0x%08x, rffd = 0x%08x, rdcc = 0x%08x\n",
-				wdtr, clkp, result, ddrcal.rqfd,
-				ddrcal.rffd, ddrcal.rdcc);
-			/*
-			 * Save the SDRAM_WRDTR and SDRAM_CLKTR
-			 * settings for the largest returned
-			 * RFFD passing window size.
-			 */
-			if (result > best_result) {
+		/*
+		 * want the lowest Read Sample Cycle Select
+		 */
+		val = SDRAM_RDCC_RDSS_DECODE(val);
+		debug("*** (%d) (%d) current_rdcc, best_rdcc\n",
+			val, best_rdcc);
+
+		if ((result != 0) &&
+		    (val >= SDRAM_RDCC_RDSS_VAL(SDRAM_RDCC_RDSS_T2))) {
+			if (((result == best_result) && (val < best_rdcc)) ||
+			    ((result > best_result) && (val <= best_rdcc))) {
+				tcal.autocal.flags = 1;
+				debug("*** (%d)(%d) result passed window "
+					"size: 0x%08x, rqfd = 0x%08x, "
+					"rffd = 0x%08x, rdcc = 0x%08x\n",
+					wdtr, clkp, result, ddrcal.rqfd,
+					ddrcal.rffd, ddrcal.rdcc);
+
 				/*
-				 * want the lowest Read Sample Cycle Select
+				 * Save the SDRAM_WRDTR and SDRAM_CLKTR
+				 * settings for the largest returned
+				 * RFFD passing window size.
 				 */
-				val = (val & SDRAM_RDCC_RDSS_MASK) >> 30;
-				debug("*** (%d) (%d) current_rdcc, best_rdcc\n",
-							val, best_rdcc);
-				if (val <= best_rdcc) {
-					best_rdcc = val;
-					tcal.clocks.wrdtr = wdtr;
-					tcal.clocks.clktr = clkp;
-					tcal.clocks.rdcc = (val << 30);
-					tcal.autocal.rqfd = ddrcal.rqfd;
-					tcal.autocal.rffd = ddrcal.rffd;
-					best_result = result;
+				best_rdcc = val;
+				tcal.clocks.wrdtr = wdtr;
+				tcal.clocks.clktr = clkp;
+				tcal.clocks.rdcc = SDRAM_RDCC_RDSS_ENCODE(val);
+				tcal.autocal.rqfd = ddrcal.rqfd;
+				tcal.autocal.rffd = ddrcal.rffd;
+				best_result = result;
 
 					if (verbose_lvl > 2) {
 						printf("** (%d)(%d)  "
@@ -1152,9 +1169,8 @@
 						       "loop FCSR: 0x%08x\n",
 							wdtr, clkp, val);
 					}
-				} /* if (val <= best_rdcc) */
-			} /* if (result >= best_result) */
-		} /* if (result != 0) */
+			}
+		} /* if ((result != 0) && (val >= (ddr_rdss_opt()))) */
 		scan_list++;
 	} /* while ((scan_list->wrdtr != -1) && (scan_list->clktr != -1)) */