ppc4xx: Enable Primordial Stack for 40x and Unify ECC Handling

This patch (Part 1 of 2):

* Rolls up a suite of changes to enable correct primordial stack and
  global data handling when the data cache is used for such a purpose
  for PPC40x-variants (i.e. CFG_INIT_DCACHE_CS).

* Related to the first, unifies DDR2 SDRAM and ECC initialization by
  eliminating redundant ECC initialization implementations and moving
  redundant SDRAM initialization out of board code into shared 4xx
  code.

* Enables MCSR visibility on the 405EX(r).

* Enables the use of the data cache for initial RAM on
  both AMCC's Kilauea and Makalu and removes a redundant
  CFG_POST_MEMORY flag from each board's CONFIG_POST value.

  - Removed, per Stefan Roese's request, defunct memory.c file for
    Makalu and rolled sdram_init from it into makalu.c.

With respect to the 4xx DDR initialization and ECC unification, there
is certainly more work that can and should be done (file renaming,
etc.). However, that can be handled at a later date on a second or
third pass. As it stands, this patch moves things forward in an
incremental yet positive way for those platforms that utilize this
code and the features associated with it.

Signed-off-by: Grant Erickson <gerickson@nuovations.com>
Signed-off-by: Stefan Roese <sr@denx.de>
diff --git a/cpu/ppc4xx/44x_spd_ddr.c b/cpu/ppc4xx/44x_spd_ddr.c
index b9cf5cb..b7eeaf2 100644
--- a/cpu/ppc4xx/44x_spd_ddr.c
+++ b/cpu/ppc4xx/44x_spd_ddr.c
@@ -53,6 +53,8 @@
 #include <ppc4xx.h>
 #include <asm/mmu.h>
 
+#include "ecc.h"
+
 #if defined(CONFIG_SPD_EEPROM) &&					\
 	(defined(CONFIG_440GP) || defined(CONFIG_440GX) ||		\
 	 defined(CONFIG_440EP) || defined(CONFIG_440GR))
@@ -296,10 +298,6 @@
 			unsigned long num_dimm_banks);
 static void program_tr1(void);
 
-#ifdef CONFIG_DDR_ECC
-static void program_ecc(unsigned long num_bytes);
-#endif
-
 static unsigned long program_bxcr(unsigned long *dimm_populated,
 				  unsigned char *iic0_dimm_addr,
 				  unsigned long num_dimm_banks);
@@ -418,7 +416,7 @@
 	/*
 	 * If ecc is enabled, initialize the parity bits.
 	 */
-	program_ecc(total_size);
+	ecc_init(CFG_SDRAM_BASE, total_size);
 #endif
 
 	return total_size;
@@ -1402,45 +1400,4 @@
 
 	return(bank_base_addr);
 }
-
-#ifdef CONFIG_DDR_ECC
-static void program_ecc(unsigned long num_bytes)
-{
-	unsigned long bank_base_addr;
-	unsigned long current_address;
-	unsigned long end_address;
-	unsigned long address_increment;
-	unsigned long cfg0;
-
-	/*
-	 * get Memory Controller Options 0 data
-	 */
-	mfsdram(mem_cfg0, cfg0);
-
-	/*
-	 * reset the bank_base address
-	 */
-	bank_base_addr = CFG_SDRAM_BASE;
-
-	if ((cfg0 & SDRAM_CFG0_MCHK_MASK) != SDRAM_CFG0_MCHK_NON) {
-		mtsdram(mem_cfg0, (cfg0 & ~SDRAM_CFG0_MCHK_MASK) | SDRAM_CFG0_MCHK_GEN);
-
-		if ((cfg0 & SDRAM_CFG0_DMWD_MASK) == SDRAM_CFG0_DMWD_32)
-			address_increment = 4;
-		else
-			address_increment = 8;
-
-		current_address = (unsigned long)(bank_base_addr);
-		end_address = (unsigned long)(bank_base_addr) + num_bytes;
-
-		while (current_address < end_address) {
-			*((unsigned long*)current_address) = 0x00000000;
-			current_address += address_increment;
-		}
-
-		mtsdram(mem_cfg0, (cfg0 & ~SDRAM_CFG0_MCHK_MASK) |
-			SDRAM_CFG0_MCHK_CHK);
-	}
-}
-#endif /* CONFIG_DDR_ECC */
 #endif /* CONFIG_SPD_EEPROM */
diff --git a/cpu/ppc4xx/44x_spd_ddr2.c b/cpu/ppc4xx/44x_spd_ddr2.c
index ec76b71..aa4a530 100644
--- a/cpu/ppc4xx/44x_spd_ddr2.c
+++ b/cpu/ppc4xx/44x_spd_ddr2.c
@@ -3,9 +3,12 @@
  * This SPD SDRAM detection code supports AMCC PPC44x cpu's with a
  * DDR2 controller (non Denali Core). Those currently are:
  *
- * 405:		405EX
+ * 405:		405EX(r)
  * 440/460:	440SP/440SPe/460EX/460GT
  *
+ * Copyright (c) 2008 Nuovation System Designs, LLC
+ *   Grant Erickson <gerickson@nuovations.com>
+
  * (C) Copyright 2007-2008
  * Stefan Roese, DENX Software Engineering, sr@denx.de.
  *
@@ -45,6 +48,8 @@
 #include <asm/mmu.h>
 #include <asm/cache.h>
 
+#include "ecc.h"
+
 #if defined(CONFIG_SPD_EEPROM) &&				\
 	(defined(CONFIG_440SP) || defined(CONFIG_440SPE) || \
 	 defined(CONFIG_460EX) || defined(CONFIG_460GT))
@@ -3064,9 +3069,116 @@
 	dcr_data = mfdcr(SDRAM_R3BAS);
 	printf("        MQ3_B0BAS       = 0x%08X\n", dcr_data);
 }
-#else
+#else /* !defined(DEBUG) */
 static void ppc440sp_sdram_register_dump(void)
 {
 }
-#endif
-#endif /* CONFIG_SPD_EEPROM */
+#endif /* defined(DEBUG) */
+#elif defined(CONFIG_405EX)
+/*-----------------------------------------------------------------------------
+ * Function:	initdram
+ * Description: Configures the PPC405EX(r) DDR1/DDR2 SDRAM memory
+ * 		banks. The configuration is performed using static, compile-
+ *		time parameters.
+ *---------------------------------------------------------------------------*/
+long initdram(int board_type)
+{
+	unsigned long val;
+
+	/* Set Memory Bank Configuration Registers */
+
+	mtsdram(SDRAM_MB0CF, CFG_SDRAM0_MB0CF);
+	mtsdram(SDRAM_MB1CF, CFG_SDRAM0_MB1CF);
+	mtsdram(SDRAM_MB2CF, CFG_SDRAM0_MB2CF);
+	mtsdram(SDRAM_MB3CF, CFG_SDRAM0_MB3CF);
+
+	/* Set Memory Clock Timing Register */
+
+	mtsdram(SDRAM_CLKTR, CFG_SDRAM0_CLKTR);
+
+	/* Set Refresh Time Register */
+
+	mtsdram(SDRAM_RTR, CFG_SDRAM0_RTR);
+
+	/* Set SDRAM Timing Registers */
+
+	mtsdram(SDRAM_SDTR1, CFG_SDRAM0_SDTR1);
+	mtsdram(SDRAM_SDTR2, CFG_SDRAM0_SDTR2);
+	mtsdram(SDRAM_SDTR3, CFG_SDRAM0_SDTR3);
+
+	/* Set Mode and Extended Mode Registers */
+
+	mtsdram(SDRAM_MMODE, CFG_SDRAM0_MMODE);
+	mtsdram(SDRAM_MEMODE, CFG_SDRAM0_MEMODE);
+
+	/* Set Memory Controller Options 1 Register */
+
+	mtsdram(SDRAM_MCOPT1, CFG_SDRAM0_MCOPT1);
+
+	/* Set Manual Initialization Control Registers */
+
+	mtsdram(SDRAM_INITPLR0, CFG_SDRAM0_INITPLR0);
+	mtsdram(SDRAM_INITPLR1, CFG_SDRAM0_INITPLR1);
+	mtsdram(SDRAM_INITPLR2, CFG_SDRAM0_INITPLR2);
+	mtsdram(SDRAM_INITPLR3, CFG_SDRAM0_INITPLR3);
+	mtsdram(SDRAM_INITPLR4, CFG_SDRAM0_INITPLR4);
+	mtsdram(SDRAM_INITPLR5, CFG_SDRAM0_INITPLR5);
+	mtsdram(SDRAM_INITPLR6, CFG_SDRAM0_INITPLR6);
+	mtsdram(SDRAM_INITPLR7, CFG_SDRAM0_INITPLR7);
+	mtsdram(SDRAM_INITPLR8, CFG_SDRAM0_INITPLR8);
+	mtsdram(SDRAM_INITPLR9, CFG_SDRAM0_INITPLR9);
+	mtsdram(SDRAM_INITPLR10, CFG_SDRAM0_INITPLR10);
+	mtsdram(SDRAM_INITPLR11, CFG_SDRAM0_INITPLR11);
+	mtsdram(SDRAM_INITPLR12, CFG_SDRAM0_INITPLR12);
+	mtsdram(SDRAM_INITPLR13, CFG_SDRAM0_INITPLR13);
+	mtsdram(SDRAM_INITPLR14, CFG_SDRAM0_INITPLR14);
+	mtsdram(SDRAM_INITPLR15, CFG_SDRAM0_INITPLR15);
+
+	/* Set On-Die Termination Registers */
+
+	mtsdram(SDRAM_CODT, CFG_SDRAM0_CODT);
+	mtsdram(SDRAM_MODT0, CFG_SDRAM0_MODT0);
+	mtsdram(SDRAM_MODT1, CFG_SDRAM0_MODT1);
+
+	/* Set Write Timing Register */
+
+	mtsdram(SDRAM_WRDTR, CFG_SDRAM0_WRDTR);
+
+	/*
+	 * Start Initialization by SDRAM0_MCOPT2[SREN] = 0 and
+	 * SDRAM0_MCOPT2[IPTR] = 1
+	 */
+
+	mtsdram(SDRAM_MCOPT2, (SDRAM_MCOPT2_SREN_EXIT |
+			       SDRAM_MCOPT2_IPTR_EXECUTE));
+
+	/*
+	 * Poll SDRAM0_MCSTAT[MIC] for assertion to indicate the
+	 * completion of initialization.
+	 */
+
+	do {
+		mfsdram(SDRAM_MCSTAT, val);
+	} while ((val & SDRAM_MCSTAT_MIC_MASK) != SDRAM_MCSTAT_MIC_COMP);
+
+	/* Set Delay Control Registers */
+
+	mtsdram(SDRAM_DLCR, CFG_SDRAM0_DLCR);
+	mtsdram(SDRAM_RDCC, CFG_SDRAM0_RDCC);
+	mtsdram(SDRAM_RQDC, CFG_SDRAM0_RQDC);
+	mtsdram(SDRAM_RFDC, CFG_SDRAM0_RFDC);
+
+	/*
+	 * Enable Controller by SDRAM0_MCOPT2[DCEN] = 1:
+	 */
+
+	mfsdram(SDRAM_MCOPT2, val);
+	mtsdram(SDRAM_MCOPT2, val | SDRAM_MCOPT2_DCEN_ENABLE);
+
+#if defined(CONFIG_DDR_ECC)
+	ecc_init(CFG_SDRAM_BASE, CFG_MBYTES_SDRAM << 20);
+#endif /* defined(CONFIG_DDR_ECC) */
+
+	return (CFG_MBYTES_SDRAM << 20);
+}
+#endif /* defined(CONFIG_SPD_EEPROM) && defined(CONFIG_440SP) || ... */
diff --git a/cpu/ppc4xx/Makefile b/cpu/ppc4xx/Makefile
index 178c5c6..800bb41 100644
--- a/cpu/ppc4xx/Makefile
+++ b/cpu/ppc4xx/Makefile
@@ -45,6 +45,7 @@
 COBJS	+= cpu_init.o
 COBJS	+= denali_data_eye.o
 COBJS	+= denali_spd_ddr2.o
+COBJS	+= ecc.o
 COBJS	+= fdt.o
 COBJS	+= gpio.o
 COBJS	+= i2c.o
diff --git a/cpu/ppc4xx/cpu_init.c b/cpu/ppc4xx/cpu_init.c
index 42eabfe..1e9423a 100644
--- a/cpu/ppc4xx/cpu_init.c
+++ b/cpu/ppc4xx/cpu_init.c
@@ -32,73 +32,6 @@
 DECLARE_GLOBAL_DATA_PTR;
 #endif
 
-#ifdef CFG_INIT_DCACHE_CS
-# if (CFG_INIT_DCACHE_CS == 0)
-#  define PBxAP pb0ap
-#  define PBxCR pb0cr
-#  if (defined(CFG_EBC_PB0AP) && defined(CFG_EBC_PB0CR))
-#   define PBxAP_VAL CFG_EBC_PB0AP
-#   define PBxCR_VAL CFG_EBC_PB0CR
-#  endif
-# endif
-# if (CFG_INIT_DCACHE_CS == 1)
-#  define PBxAP pb1ap
-#  define PBxCR pb1cr
-#  if (defined(CFG_EBC_PB1AP) && defined(CFG_EBC_PB1CR))
-#   define PBxAP_VAL CFG_EBC_PB1AP
-#   define PBxCR_VAL CFG_EBC_PB1CR
-#  endif
-# endif
-# if (CFG_INIT_DCACHE_CS == 2)
-#  define PBxAP pb2ap
-#  define PBxCR pb2cr
-#  if (defined(CFG_EBC_PB2AP) && defined(CFG_EBC_PB2CR))
-#   define PBxAP_VAL CFG_EBC_PB2AP
-#   define PBxCR_VAL CFG_EBC_PB2CR
-#  endif
-# endif
-# if (CFG_INIT_DCACHE_CS == 3)
-#  define PBxAP pb3ap
-#  define PBxCR pb3cr
-#  if (defined(CFG_EBC_PB3AP) && defined(CFG_EBC_PB3CR))
-#   define PBxAP_VAL CFG_EBC_PB3AP
-#   define PBxCR_VAL CFG_EBC_PB3CR
-#  endif
-# endif
-# if (CFG_INIT_DCACHE_CS == 4)
-#  define PBxAP pb4ap
-#  define PBxCR pb4cr
-#  if (defined(CFG_EBC_PB4AP) && defined(CFG_EBC_PB4CR))
-#   define PBxAP_VAL CFG_EBC_PB4AP
-#   define PBxCR_VAL CFG_EBC_PB4CR
-#  endif
-# endif
-# if (CFG_INIT_DCACHE_CS == 5)
-#  define PBxAP pb5ap
-#  define PBxCR pb5cr
-#  if (defined(CFG_EBC_PB5AP) && defined(CFG_EBC_PB5CR))
-#   define PBxAP_VAL CFG_EBC_PB5AP
-#   define PBxCR_VAL CFG_EBC_PB5CR
-#  endif
-# endif
-# if (CFG_INIT_DCACHE_CS == 6)
-#  define PBxAP pb6ap
-#  define PBxCR pb6cr
-#  if (defined(CFG_EBC_PB6AP) && defined(CFG_EBC_PB6CR))
-#   define PBxAP_VAL CFG_EBC_PB6AP
-#   define PBxCR_VAL CFG_EBC_PB6CR
-#  endif
-# endif
-# if (CFG_INIT_DCACHE_CS == 7)
-#  define PBxAP pb7ap
-#  define PBxCR pb7cr
-#  if (defined(CFG_EBC_PB7AP) && defined(CFG_EBC_PB7CR))
-#   define PBxAP_VAL CFG_EBC_PB7AP
-#   define PBxCR_VAL CFG_EBC_PB7CR
-#  endif
-# endif
-#endif /* CFG_INIT_DCACHE_CS */
-
 #ifndef CFG_PLL_RECONFIG
 #define CFG_PLL_RECONFIG	0
 #endif
@@ -352,24 +285,6 @@
 #if defined(CONFIG_405GP)
 	uint pvr = get_pvr();
 #endif
-
-#ifdef CFG_INIT_DCACHE_CS
-	/*
-	 * Flush and invalidate dcache, then disable CS for temporary stack.
-	 * Afterwards, this CS can be used for other purposes
-	 */
-	dcache_disable();   /* flush and invalidate dcache */
-	mtebc(PBxAP, 0);
-	mtebc(PBxCR, 0);    /* disable CS for temporary stack */
-
-#if (defined(PBxAP_VAL) && defined(PBxCR_VAL))
-	/*
-	 * Write new value into CS register
-	 */
-	mtebc(PBxAP, PBxAP_VAL);
-	mtebc(PBxCR, PBxCR_VAL);
-#endif
-#endif /* CFG_INIT_DCACHE_CS */
 
 	/*
 	 * Write Ethernetaddress into on-chip register
diff --git a/cpu/ppc4xx/ecc.c b/cpu/ppc4xx/ecc.c
new file mode 100644
index 0000000..95b941d
--- /dev/null
+++ b/cpu/ppc4xx/ecc.c
@@ -0,0 +1,121 @@
+/*
+ *    Copyright (c) 2008 Nuovation System Designs, LLC
+ *      Grant Erickson <gerickson@nuovations.com>
+ *
+ *    (C) Copyright 2005-2007
+ *    Stefan Roese, DENX Software Engineering, sr@denx.de.
+ *
+ *    (C) Copyright 2002
+ *    Jun Gu, Artesyn Technology, jung@artesyncp.com
+ *
+ *    (C) Copyright 2001
+ *    Bill Hunter, Wave 7 Optics, williamhunter@attbi.com
+ *
+ *    See file CREDITS for list of people who contributed to this
+ *    project.
+ *
+ *    This program is free software; you can redistribute it and/or
+ *    modify it under the terms of the GNU General Public License as
+ *    published by the Free Software Foundation; either version 2 of
+ *    the License, or (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will abe useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *    MA 02111-1307 USA
+ *
+ *    Description:
+ *	This file implements generic DRAM ECC initialization for
+ *	PowerPC processors using a SDRAM DDR/DDR2 controller,
+ *	including the 405EX(r), 440GP/GX/EP/GR, 440SP(E), and
+ *	460EX/GT.
+ */
+
+#include <common.h>
+#include <ppc4xx.h>
+#include <ppc_asm.tmpl>
+#include <ppc_defs.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+
+#include "ecc.h"
+
+#if !defined(CONFIG_440EPX) && !defined(CONFIG_440GRX)
+#if defined(CONFIG_DDR_ECC) || defined(CONFIG_SDRAM_ECC)
+/*
+ *  void ecc_init()
+ *
+ *  Description:
+ *    This routine initializes a range of DRAM ECC memory with known
+ *    data and enables ECC checking.
+ *
+ *  TO DO:
+ *    - Improve performance by utilizing cache.
+ *    - Further generalize to make usable by other 4xx variants (e.g.
+ *      440EPx, et al).
+ *
+ *  Input(s):
+ *    start - A pointer to the start of memory covered by ECC requiring
+ *	      initialization.
+ *    size  - The size, in bytes, of the memory covered by ECC requiring
+ *	      initialization.
+ *
+ *  Output(s):
+ *    start - A pointer to the start of memory covered by ECC with
+ *	      CFG_ECC_PATTERN written to all locations and ECC data
+ *	      primed.
+ *
+ *  Returns:
+ *    N/A
+ */
+void ecc_init(unsigned long * const start, unsigned long size)
+{
+	const unsigned long pattern = CFG_ECC_PATTERN;
+	unsigned * const end = (unsigned long * const)((long)start + size);
+	unsigned long * current = start;
+	unsigned long mcopt1;
+	long increment;
+
+	if (start >= end)
+		return;
+
+	mfsdram(SDRAM_MCOPT1, mcopt1);
+
+	/* Enable ECC generation without checking or reporting */
+
+	mtsdram(SDRAM_MCOPT1, ((mcopt1 & ~SDRAM_MCOPT1_MCHK_MASK) |
+			       SDRAM_MCOPT1_MCHK_GEN));
+
+	increment = sizeof(u32);
+
+#if defined(CONFIG_440)
+	/*
+	 * Look at the geometry of SDRAM (data width) to determine whether we
+	 * can skip words when writing.
+	 */
+
+	if ((mcopt1 & SDRAM_MCOPT1_DMWD_MASK) != SDRAM_MCOPT1_DMWD_32)
+		increment = sizeof(u64);
+#endif /* defined(CONFIG_440) */
+
+	while (current < end) {
+		*current = pattern;
+		 current = (unsigned long *)((long)current + increment);
+	}
+
+	/* Wait until the writes are finished. */
+
+	sync();
+
+	/* Enable ECC generation with checking and no reporting */
+
+	mtsdram(SDRAM_MCOPT1, ((mcopt1 & ~SDRAM_MCOPT1_MCHK_MASK) |
+			       SDRAM_MCOPT1_MCHK_CHK));
+}
+#endif /* defined(CONFIG_DDR_ECC) || defined(CONFIG_SDRAM_ECC) */
+#endif /* !defined(CONFIG_440EPX) && !defined(CONFIG_440GRX) */
diff --git a/cpu/ppc4xx/ecc.h b/cpu/ppc4xx/ecc.h
new file mode 100644
index 0000000..da1c4fd
--- /dev/null
+++ b/cpu/ppc4xx/ecc.h
@@ -0,0 +1,42 @@
+/*
+ *    Copyright (c) 2008 Nuovation System Designs, LLC
+ *	Grant Erickson <gerickson@nuovations.com>
+ *
+ *    Copyright (c) 2007 DENX Software Engineering, GmbH
+ *	Stefan Roese <sr@denx.de>
+ *
+ *    See file CREDITS for list of people who contributed to this
+ *    project.
+ *
+ *    This program is free software; you can redistribute it and/or
+ *    modify it under the terms of the GNU General Public License as
+ *    published by the Free Software Foundation; either version 2 of
+ *    the License, or (at your option) any later version.
+ *
+ *    This program is distributed in the hope that it will abe useful,
+ *    but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *    GNU General Public License for more details.
+ *
+ *    You should have received a copy of the GNU General Public License
+ *    along with this program; if not, write to the Free Software
+ *    Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ *    MA 02111-1307 USA
+ *
+ *    Description:
+ *	This file implements ECC initialization for PowerPC processors
+ *	using the SDRAM DDR2 controller, including the 405EX(r),
+ *	440SP(E), 460EX and 460GT.
+ *
+ */
+
+#ifndef _ECC_H_
+#define _ECC_H_
+
+#if !defined(CFG_ECC_PATTERN)
+#define	CFG_ECC_PATTERN	0x00000000
+#endif /* !defined(CFG_ECC_PATTERN) */
+
+extern void ecc_init(unsigned long * const start, unsigned long size);
+
+#endif /* _ECC_H_ */
diff --git a/cpu/ppc4xx/sdram.c b/cpu/ppc4xx/sdram.c
index 2724d91..901d650 100644
--- a/cpu/ppc4xx/sdram.c
+++ b/cpu/ppc4xx/sdram.c
@@ -31,6 +31,7 @@
 #include <ppc4xx.h>
 #include <asm/processor.h>
 #include "sdram.h"
+#include "ecc.h"
 
 #ifdef CONFIG_SDRAM_BANK0
 
@@ -332,49 +333,6 @@
 	*tr1_value = (first_good + last_bad) / 2;
 }
 
-#ifdef CONFIG_SDRAM_ECC
-static void ecc_init(ulong start, ulong size)
-{
-	ulong	current_addr;		/* current byte address */
-	ulong	end_addr;		/* end of memory region */
-	ulong	addr_inc;		/* address skip between writes */
-	ulong	cfg0_reg;		/* for restoring ECC state */
-
-	/*
-	 * TODO: Enable dcache before running this test (speedup)
-	 */
-
-	mfsdram(mem_cfg0, cfg0_reg);
-	mtsdram(mem_cfg0, (cfg0_reg & ~SDRAM_CFG0_MEMCHK) | SDRAM_CFG0_MEMCHK_GEN);
-
-	/*
-	 * look at geometry of SDRAM (data width) to determine whether we
-	 * can skip words when writing
-	 */
-	if ((cfg0_reg & SDRAM_CFG0_DRAMWDTH) == SDRAM_CFG0_DRAMWDTH_32)
-		addr_inc = 4;
-	else
-		addr_inc = 8;
-
-	current_addr = start;
-	end_addr = start + size;
-
-	while (current_addr < end_addr) {
-		*((ulong *)current_addr) = 0x00000000;
-		current_addr += addr_inc;
-	}
-
-	/*
-	 * TODO: Flush dcache and disable it again
-	 */
-
-	/*
-	 * Enable ecc checking and parity errors
-	 */
-	mtsdram(mem_cfg0, (cfg0_reg & ~SDRAM_CFG0_MEMCHK) | SDRAM_CFG0_MEMCHK_CHK);
-}
-#endif
-
 /*
  * Autodetect onboard DDR SDRAM on 440 platforms
  *
diff --git a/cpu/ppc4xx/start.S b/cpu/ppc4xx/start.S
index 0008170..a5d9ec9 100644
--- a/cpu/ppc4xx/start.S
+++ b/cpu/ppc4xx/start.S
@@ -3,6 +3,8 @@
  *  Copyright (C) 1999	Magnus Damm <kieraypc01.p.y.kie.era.ericsson.se>
  *  Copyright (C) 2000,2001,2002 Wolfgang Denk <wd@denx.de>
  *  Copyright (C) 2007 Stefan Roese <sr@denx.de>, DENX Software Engineering
+ *  Copyright (c) 2008 Nuovation System Designs, LLC
+ *    Grant Erickson <gerickson@nuovations.com>
  *
  * See file CREDITS for list of people who contributed to this
  * project.
@@ -79,34 +81,100 @@
 # if (CFG_INIT_DCACHE_CS == 0)
 #  define PBxAP pb0ap
 #  define PBxCR pb0cr
+#  if (defined(CFG_EBC_PB0AP) && defined(CFG_EBC_PB0CR))
+#   define PBxAP_VAL CFG_EBC_PB0AP
+#   define PBxCR_VAL CFG_EBC_PB0CR
+#  endif
 # endif
 # if (CFG_INIT_DCACHE_CS == 1)
 #  define PBxAP pb1ap
 #  define PBxCR pb1cr
+#  if (defined(CFG_EBC_PB1AP) && defined(CFG_EBC_PB1CR))
+#   define PBxAP_VAL CFG_EBC_PB1AP
+#   define PBxCR_VAL CFG_EBC_PB1CR
+#  endif
 # endif
 # if (CFG_INIT_DCACHE_CS == 2)
 #  define PBxAP pb2ap
 #  define PBxCR pb2cr
+#  if (defined(CFG_EBC_PB2AP) && defined(CFG_EBC_PB2CR))
+#   define PBxAP_VAL CFG_EBC_PB2AP
+#   define PBxCR_VAL CFG_EBC_PB2CR
+#  endif
 # endif
 # if (CFG_INIT_DCACHE_CS == 3)
 #  define PBxAP pb3ap
 #  define PBxCR pb3cr
+#  if (defined(CFG_EBC_PB3AP) && defined(CFG_EBC_PB3CR))
+#   define PBxAP_VAL CFG_EBC_PB3AP
+#   define PBxCR_VAL CFG_EBC_PB3CR
+#  endif
 # endif
 # if (CFG_INIT_DCACHE_CS == 4)
 #  define PBxAP pb4ap
 #  define PBxCR pb4cr
+#  if (defined(CFG_EBC_PB4AP) && defined(CFG_EBC_PB4CR))
+#   define PBxAP_VAL CFG_EBC_PB4AP
+#   define PBxCR_VAL CFG_EBC_PB4CR
+#  endif
 # endif
 # if (CFG_INIT_DCACHE_CS == 5)
 #  define PBxAP pb5ap
 #  define PBxCR pb5cr
+#  if (defined(CFG_EBC_PB5AP) && defined(CFG_EBC_PB5CR))
+#   define PBxAP_VAL CFG_EBC_PB5AP
+#   define PBxCR_VAL CFG_EBC_PB5CR
+#  endif
 # endif
 # if (CFG_INIT_DCACHE_CS == 6)
 #  define PBxAP pb6ap
 #  define PBxCR pb6cr
+#  if (defined(CFG_EBC_PB6AP) && defined(CFG_EBC_PB6CR))
+#   define PBxAP_VAL CFG_EBC_PB6AP
+#   define PBxCR_VAL CFG_EBC_PB6CR
+#  endif
 # endif
 # if (CFG_INIT_DCACHE_CS == 7)
 #  define PBxAP pb7ap
 #  define PBxCR pb7cr
+#  if (defined(CFG_EBC_PB7AP) && defined(CFG_EBC_PB7CR))
+#   define PBxAP_VAL CFG_EBC_PB7AP
+#   define PBxCR_VAL CFG_EBC_PB7CR
+#  endif
+# endif
+# ifndef PBxAP_VAL
+#  define PBxAP_VAL	0
+# endif
+# ifndef PBxCR_VAL
+#  define PBxCR_VAL	0
+# endif
+/*
+ * Memory Bank x (nothingness) initialization CFG_INIT_RAM_ADDR + 64 MiB
+ * used as temporary stack pointer for the primordial stack
+ */
+# ifndef CFG_INIT_DCACHE_PBxAR
+#  define CFG_INIT_DCACHE_PBxAR	(EBC_BXAP_BME_DISABLED			| \
+				 EBC_BXAP_TWT_ENCODE(7)			| \
+				 EBC_BXAP_BCE_DISABLE			| \
+				 EBC_BXAP_BCT_2TRANS			| \
+				 EBC_BXAP_CSN_ENCODE(0)			| \
+				 EBC_BXAP_OEN_ENCODE(0)			| \
+				 EBC_BXAP_WBN_ENCODE(0)			| \
+				 EBC_BXAP_WBF_ENCODE(0)			| \
+				 EBC_BXAP_TH_ENCODE(2)			| \
+				 EBC_BXAP_RE_DISABLED			| \
+				 EBC_BXAP_SOR_NONDELAYED		| \
+				 EBC_BXAP_BEM_WRITEONLY			| \
+				 EBC_BXAP_PEN_DISABLED)
+# endif /* CFG_INIT_DCACHE_PBxAR */
+# ifndef CFG_INIT_DCACHE_PBxCR
+#  define CFG_INIT_DCACHE_PBxCR	(EBC_BXCR_BAS_ENCODE(CFG_INIT_RAM_ADDR)	| \
+				 EBC_BXCR_BS_64MB			| \
+				 EBC_BXCR_BU_RW				| \
+				 EBC_BXCR_BW_16BIT)
+# endif /* CFG_INIT_DCACHE_PBxCR */
+# ifndef CFG_INIT_RAM_PATTERN
+#  define CFG_INIT_RAM_PATTERN	0xDEADDEAD
 # endif
 #endif /* CFG_INIT_DCACHE_CS */
 
@@ -114,6 +182,23 @@
 #error Only 4k of init-ram is supported - please adjust CFG_INIT_RAM_END!
 #endif
 
+/*
+ * Unless otherwise overriden, enable two 128MB cachable instruction regions
+ * at CFG_SDRAM_BASE and another 128MB cacheable instruction region covering
+ * NOR flash at CFG_FLASH_BASE. Disable all cacheable data regions.
+ */
+#if !defined(CFG_ICACHE_SACR_VALUE)
+# define CFG_ICACHE_SACR_VALUE		\
+		(PPC_128MB_SACR_VALUE(CFG_SDRAM_BASE + (  0 << 20)) | \
+		 PPC_128MB_SACR_VALUE(CFG_SDRAM_BASE + (128 << 20)) | \
+		 PPC_128MB_SACR_VALUE(CFG_FLASH_BASE))
+#endif /* !defined(CFG_ICACHE_SACR_VALUE) */
+
+#if !defined(CFG_DCACHE_SACR_VALUE)
+# define CFG_DCACHE_SACR_VALUE		\
+		(0x00000000)
+#endif /* !defined(CFG_DCACHE_SACR_VALUE) */
+
 #define function_prolog(func_name)	.text; \
 					.align 2; \
 					.globl func_name; \
@@ -840,16 +925,16 @@
 	/* make sure above stores all comlete before going on */
 	sync
 
-	/*----------------------------------------------------------------------- */
-	/* Enable two 128MB cachable regions. */
-	/*----------------------------------------------------------------------- */
-	addis	r1,r0,0xc000
-	addi	r1,r1,0x0001
-	mticcr	r1			/* instruction cache */
+	/* Set-up icache cacheability. */
+	lis	r1, CFG_ICACHE_SACR_VALUE@h
+	ori	r1, r1, CFG_ICACHE_SACR_VALUE@l
+	mticcr	r1
+	isync
 
-	addis	r1,r0,0x0000
-	addi	r1,r1,0x0000
-	mtdccr	r1			/* data cache */
+	/* Set-up dcache cacheability. */
+	lis	r1, CFG_DCACHE_SACR_VALUE@h
+	ori	r1, r1, CFG_DCACHE_SACR_VALUE@l
+	mtdccr	r1
 
 	addis	r1,r0,CFG_INIT_RAM_ADDR@h
 	ori	r1,r1,CFG_INIT_SP_OFFSET /* set up the stack to SDRAM */
@@ -892,27 +977,20 @@
 					/* dbsr is cleared by setting bits to 1) */
 	mtdbsr	r4			/* clear/reset the dbsr */
 
-	/*----------------------------------------------------------------------- */
-	/* Invalidate I and D caches. Enable I cache for defined memory regions */
-	/* to speed things up. Leave the D cache disabled for now. It will be */
-	/* enabled/left disabled later based on user selected menu options. */
-	/* Be aware that the I cache may be disabled later based on the menu */
-	/* options as well. See miscLib/main.c. */
-	/*----------------------------------------------------------------------- */
+	/* Invalidate the i- and d-caches. */
 	bl	invalidate_icache
 	bl	invalidate_dcache
 
-	/*----------------------------------------------------------------------- */
-	/* Enable two 128MB cachable regions. */
-	/*----------------------------------------------------------------------- */
-	lis	r4,0xc000
-	ori	r4,r4,0x0001
-	mticcr	r4			/* instruction cache */
+	/* Set-up icache cacheability. */
+	lis	r4, CFG_ICACHE_SACR_VALUE@h
+	ori	r4, r4, CFG_ICACHE_SACR_VALUE@l
+	mticcr	r4
 	isync
 
-	lis	r4,0x0000
-	ori	r4,r4,0x0000
-	mtdccr	r4			/* data cache */
+	/* Set-up dcache cacheability. */
+	lis	r4, CFG_DCACHE_SACR_VALUE@h
+	ori	r4, r4, CFG_DCACHE_SACR_VALUE@l
+	mtdccr	r4
 
 #if !(defined(CFG_EBC_PB0AP) && defined(CFG_EBC_PB0CR)) || defined(CONFIG_405EX)
 	/*----------------------------------------------------------------------- */
@@ -922,9 +1000,9 @@
 #endif
 #if !(defined(CFG_INIT_DCACHE_CS) || defined(CFG_TEMP_STACK_OCM))
 	/*
-	 * Boards like the Kilauea (405EX) don't have OCM and can't use
-	 * DCache for init-ram. So setup stack here directly after the
-	 * SDRAM is initialized.
+	 * For boards that don't have OCM and can't use the data cache
+	 * for their primordial stack, setup stack here directly after the
+	 * SDRAM is initialized in ext_bus_cntlr_init.
 	 */
 	lis	r1, CFG_INIT_RAM_ADDR@h
 	ori	r1,r1,CFG_INIT_SP_OFFSET /* set up the stack in SDRAM */
@@ -1043,47 +1121,86 @@
 	/* Setup temporary stack in DCACHE or OCM if needed for SDRAM SPD. */
 	/*----------------------------------------------------------------------- */
 #ifdef CFG_INIT_DCACHE_CS
-	/*----------------------------------------------------------------------- */
-	/* Memory Bank x (nothingness) initialization 1GB+64MEG */
-	/* used as temporary stack pointer for stage0  */
-	/*----------------------------------------------------------------------- */
-	li	r4,PBxAP
-	mtdcr	ebccfga,r4
-	lis	r4,0x0380
-	ori	r4,r4,0x0480
-	mtdcr	ebccfgd,r4
+	li	r4, PBxAP
+	mtdcr	ebccfga, r4
+	lis	r4, CFG_INIT_DCACHE_PBxAR@h
+	ori	r4, r4, CFG_INIT_DCACHE_PBxAR@l
+	mtdcr	ebccfgd, r4
 
-	addi	r4,0,PBxCR
-	mtdcr	ebccfga,r4
-	lis	r4,0x400D
-	ori	r4,r4,0xa000
-	mtdcr	ebccfgd,r4
+	addi	r4, 0, PBxCR
+	mtdcr	ebccfga, r4
+	lis	r4, CFG_INIT_DCACHE_PBxCR@h
+	ori	r4, r4, CFG_INIT_DCACHE_PBxCR@l
+	mtdcr	ebccfgd, r4
 
-	/* turn on data cache for this region */
-	lis	r4,0x0080
+	/*
+	 * Enable the data cache for the 128MB storage access control region
+	 * at CFG_INIT_RAM_ADDR.
+	 */
+	mfdccr	r4
+	oris	r4, r4, PPC_128MB_SACR_VALUE(CFG_INIT_RAM_ADDR)@h
+	ori	r4, r4, PPC_128MB_SACR_VALUE(CFG_INIT_RAM_ADDR)@l
 	mtdccr	r4
 
+	/*
+	 * Preallocate data cache lines to be used to avoid a subsequent
+	 * cache miss and an ensuing machine check exception when exceptions
+	 * are enabled.
+	 */
+	li	r0, 0
+
-	/* set stack pointer and clear stack to known value */
+	lis	r3, CFG_INIT_RAM_ADDR@h
+	ori	r3, r3, CFG_INIT_RAM_ADDR@l
 
-	lis	r1,CFG_INIT_RAM_ADDR@h
-	ori	r1,r1,CFG_INIT_SP_OFFSET@l
+	lis	r4, CFG_INIT_RAM_END@h
+	ori	r4, r4, CFG_INIT_RAM_END@l
 
-	li	r4,2048			/* we store 2048 words to stack */
+	/*
+	 * Convert the size, in bytes, to the number of cache lines/blocks
+	 * to preallocate.
+	 */
+	clrlwi. r5, r4, (32 - L1_CACHE_SHIFT)
+	srwi	r5, r4, L1_CACHE_SHIFT
+	beq	..load_counter
+	addi	r5, r5, 0x0001
+..load_counter:
+	mtctr	r5
+
+	/* Preallocate the computed number of cache blocks. */
+..alloc_dcache_block:
+	dcba	r0, r3
+	addi	r3, r3, L1_CACHE_BYTES
+	bdnz	..alloc_dcache_block
+	sync
+
+	/*
+	 * Load the initial stack pointer and data area and convert the size,
+	 * in bytes, to the number of words to initialize to a known value.
+	 */
+	lis	r1, CFG_INIT_RAM_ADDR@h
+	ori	r1, r1, CFG_INIT_SP_OFFSET@l
+
+	lis	r4, (CFG_INIT_RAM_END >> 2)@h
+	ori	r4, r4, (CFG_INIT_RAM_END >> 2)@l
 	mtctr	r4
 
-	lis	r2,CFG_INIT_RAM_ADDR@h		/* we also clear data area */
-	ori	r2,r2,CFG_INIT_RAM_END@l	/* so cant copy value from r1 */
+	lis	r2, CFG_INIT_RAM_ADDR@h
+	ori	r2, r2, CFG_INIT_RAM_END@l
 
-	lis	r4,0xdead		/* we store 0xdeaddead in the stack */
-	ori	r4,r4,0xdead
+	lis	r4, CFG_INIT_RAM_PATTERN@h
+	ori	r4, r4, CFG_INIT_RAM_PATTERN@l
 
 ..stackloop:
-	stwu	r4,-4(r2)
+	stwu	r4, -4(r2)
 	bdnz	..stackloop
 
-	li	r0, 0			/* Make room for stack frame header and */
-	stwu	r0, -4(r1)		/* clear final stack frame so that	*/
-	stwu	r0, -4(r1)		/* stack backtraces terminate cleanly	*/
+	/*
+	 * Make room for stack frame header and clear final stack frame so
+	 * that stack backtraces terminate cleanly.
+	 */
+	stwu	r0, -4(r1)
+	stwu	r0, -4(r1)
+
 	/*
 	 * Set up a dummy frame to store reset vector as return address.
 	 * this causes stack underflow to reset board.
@@ -1328,33 +1445,72 @@
  * This "function" does not return, instead it continues in RAM
  * after relocating the monitor code.
  *
- * r3 = dest
- * r4 = src
- * r5 = length in bytes
- * r6 = cachelinesize
+ * r3 = Relocated stack pointer
+ * r4 = Relocated global data pointer
+ * r5 = Relocated text pointer
  */
 	.globl	relocate_code
 relocate_code:
-#ifdef CONFIG_4xx_DCACHE
+#if defined(CONFIG_4xx_DCACHE) || defined(CFG_INIT_DCACHE_CS)
 	/*
-	 * We need to flush the Init Data before the dcache will be
-	 * invalidated
+	 * We need to flush the initial global data (gd_t) before the dcache
+	 * will be invalidated.
 	 */
 
-	/* save regs */
-	mr	r9,r3
-	mr	r10,r4
-	mr	r11,r5
+	/* Save registers */
+	mr	r9, r3
+	mr	r10, r4
+	mr	r11, r5
 
-	mr	r3,r4
-	addi	r4,r4,0x200	/* should be enough for init data */
+	/* Flush initial global data range */
+	mr	r3, r4
+	addi	r4, r4, CFG_GBL_DATA_SIZE@l
 	bl	flush_dcache_range
 
-	/* restore regs */
-	mr	r3,r9
-	mr	r4,r10
-	mr	r5,r11
-#endif
+#if defined(CFG_INIT_DCACHE_CS)
+	/*
+	 * Undo the earlier data cache set-up for the primordial stack and
+	 * data area. First, invalidate the data cache and then disable data
+	 * cacheability for that area. Finally, restore the EBC values, if
+	 * any.
+	 */
+
+	/* Invalidate the primordial stack and data area in cache */
+	lis	r3, CFG_INIT_RAM_ADDR@h
+	ori	r3, r3, CFG_INIT_RAM_ADDR@l
+
+	lis	r4, CFG_INIT_RAM_END@h
+	ori	r4, r4, CFG_INIT_RAM_END@l
+	add	r4, r4, r3
+
+	bl	invalidate_dcache_range
+
+	/* Disable cacheability for the region */
+	mfdccr	r3
+	lis     r4, ~PPC_128MB_SACR_VALUE(CFG_INIT_RAM_ADDR)@h
+	ori     r4, r4, ~PPC_128MB_SACR_VALUE(CFG_INIT_RAM_ADDR)@l
+	and     r3, r3, r4
+	mtdccr  r3
+
+	/* Restore the EBC parameters */
+	li	r3, PBxAP
+	mtdcr	ebccfga, r3
+	lis	r3, PBxAP_VAL@h
+	ori	r3, r3, PBxAP_VAL@l
+	mtdcr	ebccfgd, r3
+
+	li	r3, PBxCR
+	mtdcr	ebccfga, r3
+	lis	r3, PBxCR_VAL@h
+	ori	r3, r3, PBxCR_VAL@l
+	mtdcr	ebccfgd, r3
+#endif /* defined(CFG_INIT_DCACHE_CS) */
+
+	/* Restore registers */
+	mr	r3, r9
+	mr	r4, r10
+	mr	r5, r11
+#endif /* defined(CONFIG_4xx_DCACHE) || defined(CFG_INIT_DCACHE_CS) */
 
 #ifdef CFG_INIT_RAM_DCACHE
 	/*
@@ -1396,13 +1552,13 @@
 	addi	r1,r0,CFG_TLB_FOR_BOOT_FLASH	/* Use defined TLB */
 #else
 	addi	r1,r0,0x0000		/* Default TLB entry is #0 */
-#endif
+#endif /* CFG_TLB_FOR_BOOT_FLASH */
 	tlbre	r0,r1,0x0002		/* Read contents */
 	ori	r0,r0,0x0c00		/* Or in the inhibit, write through bit */
 	tlbwe	r0,r1,0x0002		/* Save it out */
 	sync
 	isync
-#endif
+#endif /* defined(CONFIG_440EP) || ... || defined(CONFIG_460GT) */
 	mr	r1,  r3		/* Set new stack pointer		*/
 	mr	r9,  r4		/* Save copy of Init Data pointer	*/
 	mr	r10, r5		/* Save copy of Destination Address	*/
@@ -1425,7 +1581,7 @@
 
 	/* First our own GOT */
 	add	r14, r14, r15
-	/* the the one used by the C code */
+	/* then the one used by the C code */
 	add	r30, r30, r15
 
 	/*
diff --git a/cpu/ppc4xx/traps.c b/cpu/ppc4xx/traps.c
index 38b6f89..8b7e32a 100644
--- a/cpu/ppc4xx/traps.c
+++ b/cpu/ppc4xx/traps.c
@@ -170,7 +170,7 @@
 
 	val = get_esr();
 
-#if !defined(CONFIG_440)
+#if !defined(CONFIG_440) && !defined(CONFIG_405EX)
 	if (val& ESR_IMCP) {
 		printf("Instruction");
 		mtspr(ESR, val & ~ESR_IMCP);
@@ -179,7 +179,7 @@
 	}
 	printf(" machine check.\n");
 
-#elif defined(CONFIG_440)
+#elif defined(CONFIG_440) || defined(CONFIG_405EX)
 	if (val& ESR_IMCP){
 		printf("Instruction Synchronous Machine Check exception\n");
 		mtspr(SPRN_ESR, val & ~ESR_IMCP);
@@ -187,10 +187,15 @@
 		val = mfspr(MCSR);
 		if (val & MCSR_IB)
 			printf("Instruction Read PLB Error\n");
+#if defined(CONFIG_440)
 		if (val & MCSR_DRB)
 			printf("Data Read PLB Error\n");
 		if (val & MCSR_DWB)
 			printf("Data Write PLB Error\n");
+#else
+		if (val & MCSR_DB)
+			printf("Data PLB Error\n");
+#endif
 		if (val & MCSR_TLBP)
 			printf("TLB Parity Error\n");
 		if (val & MCSR_ICP){