Merge with /home/stefan/git/u-boot/bamboo-nand
diff --git a/MAKEALL b/MAKEALL
index 04653a0..81f5dfc 100755
--- a/MAKEALL
+++ b/MAKEALL
@@ -76,21 +76,21 @@
 
 LIST_4xx="	\
 	acadia		ADCIOP		alpr		AP1000		\
-	AR405		ASH405		bamboo		bubinga		\
-	CANBT		CMS700		CPCI2DP		CPCI405		\
-	CPCI4052	CPCI405AB	CPCI405DT	CPCI440		\
-	CPCIISER4	CRAYL1		csb272		csb472		\
-	DASA_SIM	DP405		DU405		ebony		\
-	ERIC		EXBITGEN	G2000		HH405		\
-	HUB405		JSE		KAREF		katmai		\
-	luan		METROBOX	MIP405		MIP405T		\
-	ML2		ml300		ocotea		OCRTC		\
-	ORSG		p3p440		PCI405		pcs440ep	\
-	PIP405		PLU405		PMC405		PPChameleonEVB	\
-	sbc405		sc3		sequoia		sequoia_nand	\
-	taishan		VOH405		VOM405		W7OLMC		\
-	W7OLMG		walnut		WUH405		XPEDITE1K	\
-	yellowstone	yosemite	yucca				\
+	AR405		ASH405		bamboo		bamboo_nand	\
+	bubinga		CANBT		CMS700		CPCI2DP		\
+	CPCI405		CPCI4052	CPCI405AB	CPCI405DT	\
+	CPCI440		CPCIISER4	CRAYL1		csb272		\
+	csb472		DASA_SIM	DP405		DU405		\
+	ebony		ERIC		EXBITGEN	G2000		\
+	HH405		HUB405		JSE		KAREF		\
+	katmai		luan		METROBOX	MIP405		\
+	MIP405T		ML2		ml300		ocotea		\
+	OCRTC		ORSG		p3p440		PCI405		\
+	pcs440ep	PIP405		PLU405		PMC405		\
+	PPChameleonEVB	sbc405		sc3		sequoia		\
+	sequoia_nand	taishan		VOH405		VOM405		\
+	W7OLMC		W7OLMG		walnut		WUH405		\
+	XPEDITE1K	yellowstone	yosemite	yucca		\
 "
 
 #########################################################################
diff --git a/Makefile b/Makefile
index caa5a0e..463757c 100644
--- a/Makefile
+++ b/Makefile
@@ -1038,6 +1038,15 @@
 bamboo_config:	unconfig
 	@$(MKCONFIG) $(@:_config=) ppc ppc4xx bamboo amcc
 
+bamboo_nand_config:	unconfig
+	@mkdir -p $(obj)include
+	@mkdir -p $(obj)nand_spl
+	@mkdir -p $(obj)board/amcc/bamboo
+	@echo "#define CONFIG_NAND_U_BOOT" > $(obj)include/config.h
+	@$(MKCONFIG) -n $@ -a bamboo ppc ppc4xx bamboo amcc
+	@echo "TEXT_BASE = 0x01000000" > $(obj)board/amcc/bamboo/config.tmp
+	@echo "CONFIG_NAND_U_BOOT = y" >> $(obj)include/config.mk
+
 bubinga_config:	unconfig
 	@$(MKCONFIG) $(@:_config=) ppc ppc4xx bubinga amcc
 
diff --git a/board/amcc/bamboo/Makefile b/board/amcc/bamboo/Makefile
index 5da96e9..d01cc49 100644
--- a/board/amcc/bamboo/Makefile
+++ b/board/amcc/bamboo/Makefile
@@ -1,5 +1,5 @@
 #
-# (C) Copyright 2002-2006
+# (C) Copyright 2002-2007
 # Wolfgang Denk, DENX Software Engineering, wd@denx.de.
 #
 # See file CREDITS for list of people who contributed to this
@@ -33,7 +33,7 @@
 SOBJS	:= $(addprefix $(obj),$(SOBJS))
 
 $(LIB):	$(OBJS) $(SOBJS)
-	$(AR) $(ARFLAGS) $@ $(OBJS)
+	$(AR) $(ARFLAGS) $@ $(OBJS) $(SOBJS)
 
 clean:
 	rm -f $(SOBJS) $(OBJS)
diff --git a/board/amcc/bamboo/bamboo.c b/board/amcc/bamboo/bamboo.c
index 6260b01..2e651df 100644
--- a/board/amcc/bamboo/bamboo.c
+++ b/board/amcc/bamboo/bamboo.c
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright 2005
+ * (C) Copyright 2005-2007
  * Stefan Roese, DENX Software Engineering, sr@denx.de.
  *
  * See file CREDITS for list of people who contributed to this
@@ -291,6 +291,7 @@
 	return (0);
 }
 
+#if !(defined(CONFIG_NAND_U_BOOT) || defined(CONFIG_NAND_SPL))
 /*************************************************************************
  *
  * init_spd_array -- Bamboo has one bank onboard sdram (plus DIMM)
@@ -345,10 +346,12 @@
 	cfg_simulate_spd_eeprom[25]    = 0x00;    /* SDRAM Cycle Time (cas latency 1.5) = N.A */
 	cfg_simulate_spd_eeprom[12]    = 0x82;    /* refresh Rate Type: Normal (15.625us) + Self refresh */
 }
+#endif
 
 long int initdram (int board_type)
 {
-	long dram_size = 0;
+#if !(defined(CONFIG_NAND_U_BOOT) || defined(CONFIG_NAND_SPL))
+	long dram_size;
 
 	/*
 	 * First write simulated values in eeprom array for onboard bank 0
@@ -358,6 +361,9 @@
 	dram_size = spd_sdram();
 
 	return dram_size;
+#else
+	return CFG_MBYTES_SDRAM << 20;
+#endif
 }
 
 #if defined(CFG_DRAM_TEST)
@@ -881,11 +887,11 @@
 		/*------------------------------------------------------------------------- */
 	case BOOT_FROM_NAND_FLASH0:
 		/*------------------------------------------------------------------------- */
-		ebc0_cs0_bnap_value = 0;
-		ebc0_cs0_bncr_value = 0;
+		ebc0_cs0_bnap_value = EBC0_BNAP_NAND_FLASH;
+		ebc0_cs0_bncr_value = EBC0_BNCR_NAND_FLASH_CS1;
 
-		ebc0_cs1_bnap_value = EBC0_BNAP_NAND_FLASH;
-		ebc0_cs1_bncr_value = EBC0_BNCR_NAND_FLASH_CS1;
+		ebc0_cs1_bnap_value = 0;
+		ebc0_cs1_bncr_value = 0;
 		ebc0_cs2_bnap_value = 0;
 		ebc0_cs2_bncr_value = 0;
 		ebc0_cs3_bnap_value = 0;
@@ -1409,10 +1415,10 @@
 	gpio_tab[GPIO0][6].in_out = GPIO_OUT;	    /* EBC_CS_N(1) */
 	gpio_tab[GPIO0][6].alt_nb = GPIO_ALT1;
 
-#if 0
 	gpio_tab[GPIO0][7].in_out = GPIO_OUT;	    /* EBC_CS_N(2) */
 	gpio_tab[GPIO0][7].alt_nb = GPIO_ALT1;
 
+#if 0
 	gpio_tab[GPIO0][7].in_out = GPIO_OUT;	    /* EBC_CS_N(3) */
 	gpio_tab[GPIO0][7].alt_nb = GPIO_ALT1;
 #endif
@@ -1900,12 +1906,21 @@
 	{
 		update_ndfc_ios();
 
+#if !(defined(CONFIG_NAND_U_BOOT) || defined(CONFIG_NAND_SPL))
 		mtsdr(sdr_cust0, SDR0_CUST0_MUX_NDFC_SEL   |
 		      SDR0_CUST0_NDFC_ENABLE	|
 		      SDR0_CUST0_NDFC_BW_8_BIT	|
 		      SDR0_CUST0_NDFC_ARE_MASK	|
 		      SDR0_CUST0_CHIPSELGAT_EN1 |
 		      SDR0_CUST0_CHIPSELGAT_EN2);
+#else
+		mtsdr(sdr_cust0, SDR0_CUST0_MUX_NDFC_SEL   |
+		      SDR0_CUST0_NDFC_ENABLE	|
+		      SDR0_CUST0_NDFC_BW_8_BIT	|
+		      SDR0_CUST0_NDFC_ARE_MASK	|
+		      SDR0_CUST0_CHIPSELGAT_EN0 |
+		      SDR0_CUST0_CHIPSELGAT_EN2);
+#endif
 
 		ndfc_selection_in_fpga();
 	}
diff --git a/board/amcc/bamboo/config.mk b/board/amcc/bamboo/config.mk
index 9d7f4c3..b46527d 100644
--- a/board/amcc/bamboo/config.mk
+++ b/board/amcc/bamboo/config.mk
@@ -1,5 +1,5 @@
 #
-# (C) Copyright 2002-2006
+# (C) Copyright 2002-2007
 # Wolfgang Denk, DENX Software Engineering, wd@denx.de.
 #
 # See file CREDITS for list of people who contributed to this
@@ -21,7 +21,11 @@
 # MA 02111-1307 USA
 #
 
+sinclude $(OBJTREE)/board/$(BOARDDIR)/config.tmp
+
+ifndef TEXT_BASE
 TEXT_BASE = 0xFFFA0000
+endif
 
 PLATFORM_CPPFLAGS += -DCONFIG_440=1
 
diff --git a/board/amcc/bamboo/flash.c b/board/amcc/bamboo/flash.c
index a30ab7a..8a2e832 100644
--- a/board/amcc/bamboo/flash.c
+++ b/board/amcc/bamboo/flash.c
@@ -53,7 +53,7 @@
 static unsigned long flash_addr_table[][CFG_MAX_FLASH_BANKS] = {
 	{0x87800001, 0xFFF00000, 0xFFF80000}, /* 0:boot from small flash */
 	{0x00000000, 0x00000000, 0x00000000}, /* 1:boot from pci 66      */
-	{0x00000000, 0x00000000, 0x00000000}, /* 2:boot from nand flash  */
+	{0x87800001, 0x00000000, 0x00000000}, /* 0:boot from nand flash  */
 	{0x87F00000, 0x87F80000, 0xFFC00001}, /* 3:boot from big flash 33*/
 	{0x87F00000, 0x87F80000, 0xFFC00001}, /* 4:boot from big flash 66*/
 	{0x00000000, 0x00000000, 0x00000000}, /* 5:boot from             */
@@ -134,10 +134,10 @@
 		flash_info[i].size = 0;
 
 		/* check whether the address is 0 */
-		if (flash_addr_table[index][i] == 0) {
+		if (flash_addr_table[index][i] == 0)
 			continue;
-		}
 
+		DEBUGF("Detection bank %d...\n", i);
 		/* call flash_get_size() to initialize sector address */
 		size_b[i] = flash_get_size((vu_long *) flash_addr_table[index][i],
 				   &flash_info[i]);
diff --git a/board/amcc/bamboo/init.S b/board/amcc/bamboo/init.S
index 7820107..1459eec 100644
--- a/board/amcc/bamboo/init.S
+++ b/board/amcc/bamboo/init.S
@@ -1,74 +1,31 @@
 /*
-*
-* See file CREDITS for list of people who contributed to this
-* project.
-*
-* This program is free software; you can redistribute it and/or
-* modify it under the terms of the GNU General Public License as
-* published by the Free Software Foundation; either version 2 of
-* the License, or (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
-* MA 02111-1307 USA
-*/
+ * (C) Copyright 2007
+ * Stefan Roese, DENX Software Engineering, sr@denx.de.
+ *
+ *  Copyright (C) 2002 Scott McNutt <smcnutt@artesyncp.com>
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
 
 #include <ppc_asm.tmpl>
 #include <config.h>
-
-/* General */
-#define TLB_VALID   0x00000200
-
-/* Supported page sizes */
-
-#define SZ_1K	    0x00000000
-#define SZ_4K	    0x00000010
-#define SZ_16K	    0x00000020
-#define SZ_64K	    0x00000030
-#define SZ_256K	    0x00000040
-#define SZ_1M	    0x00000050
-#define SZ_8M       0x00000060
-#define SZ_16M	    0x00000070
-#define SZ_256M	    0x00000090
-
-/* Storage attributes */
-#define SA_W	    0x00000800	    /* Write-through */
-#define SA_I	    0x00000400	    /* Caching inhibited */
-#define SA_M	    0x00000200	    /* Memory coherence */
-#define SA_G	    0x00000100	    /* Guarded */
-#define SA_E	    0x00000080	    /* Endian */
-
-/* Access control */
-#define AC_X	    0x00000024	    /* Execute */
-#define AC_W	    0x00000012	    /* Write */
-#define AC_R	    0x00000009	    /* Read */
-
-/* Some handy macros */
-
-#define EPN(e)		((e) & 0xfffffc00)
-#define TLB0(epn,sz)	( (EPN((epn)) | (sz) | TLB_VALID ) )
-#define TLB1(rpn,erpn)	( ((rpn)&0xfffffc00) | (erpn) )
-#define TLB2(a)		( (a)&0x00000fbf )
-
-#define tlbtab_start\
-	mflr    r1  ;\
-	bl 0f	    ;
-
-#define tlbtab_end\
-	.long 0, 0, 0	;   \
-0:	mflr    r0	;   \
-	mtlr    r1	;   \
-	blr		;
-
-#define tlbentry(epn,sz,rpn,erpn,attr)\
-	.long TLB0(epn,sz),TLB1(rpn,erpn),TLB2(attr)
-
+#include <asm-ppc/mmu.h>
 
 /**************************************************************************
  * TLB TABLE
@@ -80,34 +37,68 @@
  *  Pointer to the table is returned in r1
  *
  *************************************************************************/
-
-    .section .bootpg,"ax"
-    .globl tlbtab
+	.section .bootpg,"ax"
+	.globl tlbtab
 
 tlbtab:
-    tlbtab_start
+	tlbtab_start
+
+	/*
+	 * BOOT_CS (FLASH) must be first. Before relocation SA_I can be off to use the
+	 * speed up boot process. It is patched after relocation to enable SA_I
+	 */
+#ifndef CONFIG_NAND_SPL
+	tlbentry(CFG_BOOT_BASE_ADDR, SZ_256M, CFG_BOOT_BASE_ADDR, 0, AC_R|AC_W|AC_X|SA_G)
+#else
+	tlbentry(CFG_NAND_BOOT_SPL_SRC, SZ_4K, CFG_NAND_BOOT_SPL_SRC, 0, AC_R|AC_W|AC_X|SA_G)
+#endif
+
+	/* TLB-entry for init-ram in dcache (SA_I must be turned off!) */
+	tlbentry(CFG_INIT_RAM_ADDR, SZ_4K, CFG_INIT_RAM_ADDR, 0, AC_R|AC_W|AC_X|SA_G)
 
-    /*
-     * BOOT_CS (FLASH) must be first. Before relocation SA_I can be off to use the
-     * speed up boot process. It is patched after relocation to enable SA_I
-     */
-    tlbentry( CFG_BOOT_BASE_ADDR, SZ_256M, CFG_BOOT_BASE_ADDR, 0, AC_R|AC_W|AC_X|SA_G/*|SA_I*/)
+	tlbentry(CFG_SDRAM_BASE, SZ_256M, CFG_SDRAM_BASE, 0, AC_R|AC_W|AC_X|SA_G|SA_I)
 
-    /* TLB-entry for init-ram in dcache (SA_I must be turned off!) */
-    tlbentry( CFG_INIT_RAM_ADDR, SZ_64K, CFG_INIT_RAM_ADDR, 0, AC_R|AC_W|AC_X|SA_G )
+	/* PCI base & peripherals */
+	tlbentry(CFG_PCI_BASE, SZ_256M, CFG_PCI_BASE, 0, AC_R|AC_W|SA_G|SA_I)
 
-    tlbentry( CFG_SDRAM_BASE, SZ_256M, CFG_SDRAM_BASE, 0, AC_R|AC_W|AC_X|SA_G|SA_I )
-    tlbentry( CFG_PCI_BASE, SZ_256M, CFG_PCI_BASE, 0, AC_R|AC_W|SA_G|SA_I )
-    tlbentry( CFG_NVRAM_BASE_ADDR, SZ_256M, CFG_NVRAM_BASE_ADDR, 0, AC_R|AC_W|AC_X|SA_W|SA_I )
-    tlbentry( CFG_NAND_ADDR, SZ_256M, CFG_NAND_ADDR, 0, AC_R|AC_W|AC_X|SA_W|SA_I )
+	tlbentry(CFG_NVRAM_BASE_ADDR, SZ_256M, CFG_NVRAM_BASE_ADDR, 0, AC_R|AC_W|AC_X|SA_W|SA_I)
+	tlbentry(CFG_NAND_ADDR, SZ_4K, CFG_NAND_ADDR, 0, AC_R|AC_W|AC_X|SA_W|SA_I)
 
-    /* PCI */
-    tlbentry( CFG_PCI_MEMBASE, SZ_256M, CFG_PCI_MEMBASE, 0, AC_R|AC_W|SA_G|SA_I )
-    tlbentry( CFG_PCI_MEMBASE1, SZ_256M, CFG_PCI_MEMBASE1, 0, AC_R|AC_W|SA_G|SA_I )
-    tlbentry( CFG_PCI_MEMBASE2, SZ_256M, CFG_PCI_MEMBASE2, 0, AC_R|AC_W|SA_G|SA_I )
-    tlbentry( CFG_PCI_MEMBASE3, SZ_256M, CFG_PCI_MEMBASE3, 0, AC_R|AC_W|SA_G|SA_I )
+	/* PCI */
+	tlbentry(CFG_PCI_MEMBASE, SZ_256M, CFG_PCI_MEMBASE, 0, AC_R|AC_W|SA_G|SA_I)
+	tlbentry(CFG_PCI_MEMBASE1, SZ_256M, CFG_PCI_MEMBASE1, 0, AC_R|AC_W|SA_G|SA_I)
+	tlbentry(CFG_PCI_MEMBASE2, SZ_256M, CFG_PCI_MEMBASE2, 0, AC_R|AC_W|SA_G|SA_I)
+	tlbentry(CFG_PCI_MEMBASE3, SZ_256M, CFG_PCI_MEMBASE3, 0, AC_R|AC_W|SA_G|SA_I)
 
-    /* USB 2.0 Device */
-    tlbentry( CFG_USB_DEVICE, SZ_1K, CFG_USB_DEVICE, 0, AC_R|AC_W|SA_G|SA_I )
+	/* USB 2.0 Device */
+	tlbentry(CFG_USB_DEVICE, SZ_1K, CFG_USB_DEVICE, 0, AC_R|AC_W|SA_G|SA_I)
+
+	tlbtab_end
+
+#if defined(CONFIG_NAND_U_BOOT) && !defined(CONFIG_NAND_SPL)
+	/*
+	 * For NAND booting the first TLB has to be reconfigured to full size
+	 * and with caching disabled after running from RAM!
+	 */
+#define TLB00	TLB0(CFG_BOOT_BASE_ADDR, SZ_256M)
+#define TLB01	TLB1(CFG_BOOT_BASE_ADDR, 0)
+#define TLB02	TLB2(AC_R|AC_W|AC_X|SA_G|SA_I)
 
-    tlbtab_end
+	.globl	reconfig_tlb0
+reconfig_tlb0:
+	sync
+	isync
+	addi	r4,r0,0x0000		/* TLB entry #0 */
+	lis	r5,TLB00@h
+	ori	r5,r5,TLB00@l
+	tlbwe	r5,r4,0x0000		/* Save it out */
+	lis	r5,TLB01@h
+	ori	r5,r5,TLB01@l
+	tlbwe	r5,r4,0x0001		/* Save it out */
+	lis	r5,TLB02@h
+	ori	r5,r5,TLB02@l
+	tlbwe	r5,r4,0x0002		/* Save it out */
+	sync
+	isync
+	blr
+#endif
diff --git a/board/amcc/sequoia/sdram.c b/board/amcc/sequoia/sdram.c
index d045df1..78e2cb4 100644
--- a/board/amcc/sequoia/sdram.c
+++ b/board/amcc/sequoia/sdram.c
@@ -387,7 +387,11 @@
 long int initdram (int board_type)
 {
 #if !defined(CONFIG_NAND_U_BOOT) || defined(CONFIG_NAND_SPL)
+#if !defined(CONFIG_NAND_SPL)
 	ulong speed = get_bus_freq(0);
+#else
+	ulong speed = 133333333;	/* 133MHz is on the safe side	*/
+#endif
 
 	mtsdram(DDR0_02, 0x00000000);
 
diff --git a/cpu/ppc4xx/44x_spd_ddr.c b/cpu/ppc4xx/44x_spd_ddr.c
index 10b4c18..c500d3f 100644
--- a/cpu/ppc4xx/44x_spd_ddr.c
+++ b/cpu/ppc4xx/44x_spd_ddr.c
@@ -20,7 +20,7 @@
  * Jun Gu, Artesyn Technology, jung@artesyncp.com
  * Support for AMCC 440 based on OpenBIOS draminit.c from IBM.
  *
- * (C) Copyright 2005
+ * (C) Copyright 2005-2007
  * Stefan Roese, DENX Software Engineering, sr@denx.de.
  *
  * See file CREDITS for list of people who contributed to this
@@ -42,6 +42,11 @@
  * MA 02111-1307 USA
  */
 
+/* define DEBUG for debugging output (obviously ;-)) */
+#if 0
+#define DEBUG
+#endif
+
 #include <common.h>
 #include <asm/processor.h>
 #include <i2c.h>
@@ -246,25 +251,6 @@
 #define MY_TLB_WORD2_I_ENABLE	TLB_WORD2_I_ENABLE	/* disable caching on SDRAM */
 #endif
 
-const unsigned long test[NUMMEMTESTS][NUMMEMWORDS] = {
-	{0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000,
-	 0xFFFFFFFF, 0xFFFFFFFF},
-	{0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF,
-	 0x00000000, 0x00000000},
-	{0xAAAAAAAA, 0xAAAAAAAA, 0x55555555, 0x55555555, 0xAAAAAAAA, 0xAAAAAAAA,
-	 0x55555555, 0x55555555},
-	{0x55555555, 0x55555555, 0xAAAAAAAA, 0xAAAAAAAA, 0x55555555, 0x55555555,
-	 0xAAAAAAAA, 0xAAAAAAAA},
-	{0xA5A5A5A5, 0xA5A5A5A5, 0x5A5A5A5A, 0x5A5A5A5A, 0xA5A5A5A5, 0xA5A5A5A5,
-	 0x5A5A5A5A, 0x5A5A5A5A},
-	{0x5A5A5A5A, 0x5A5A5A5A, 0xA5A5A5A5, 0xA5A5A5A5, 0x5A5A5A5A, 0x5A5A5A5A,
-	 0xA5A5A5A5, 0xA5A5A5A5},
-	{0xAA55AA55, 0xAA55AA55, 0x55AA55AA, 0x55AA55AA, 0xAA55AA55, 0xAA55AA55,
-	 0x55AA55AA, 0x55AA55AA},
-	{0x55AA55AA, 0x55AA55AA, 0xAA55AA55, 0xAA55AA55, 0x55AA55AA, 0x55AA55AA,
-	 0xAA55AA55, 0xAA55AA55}
-};
-
 /* bank_parms is used to sort the bank sizes by descending order */
 struct bank_param {
 	unsigned long cr;
@@ -278,46 +264,37 @@
 #endif
 void program_tlb(u32 start, u32 size, u32 tlb_word2_i_value);
 
-unsigned char spd_read(uchar chip, uint addr);
+static unsigned char spd_read(uchar chip, uint addr);
+static void get_spd_info(unsigned long *dimm_populated,
+			 unsigned char *iic0_dimm_addr,
+			 unsigned long num_dimm_banks);
+static void check_mem_type(unsigned long *dimm_populated,
+			   unsigned char *iic0_dimm_addr,
+			   unsigned long num_dimm_banks);
+static void check_volt_type(unsigned long *dimm_populated,
+			    unsigned char *iic0_dimm_addr,
+			    unsigned long num_dimm_banks);
+static void program_cfg0(unsigned long *dimm_populated,
+			 unsigned char *iic0_dimm_addr,
+			 unsigned long  num_dimm_banks);
+static void program_cfg1(unsigned long *dimm_populated,
+			 unsigned char *iic0_dimm_addr,
+			 unsigned long num_dimm_banks);
+static void program_rtr(unsigned long *dimm_populated,
+			unsigned char *iic0_dimm_addr,
+			unsigned long num_dimm_banks);
+static void program_tr0(unsigned long *dimm_populated,
+			unsigned char *iic0_dimm_addr,
+			unsigned long num_dimm_banks);
+static void program_tr1(void);
 
-void get_spd_info(unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks);
-
-void check_mem_type
-(unsigned long* dimm_populated,
- unsigned char* iic0_dimm_addr,
- unsigned long	num_dimm_banks);
-
-void check_volt_type
-(unsigned long* dimm_populated,
- unsigned char* iic0_dimm_addr,
- unsigned long	num_dimm_banks);
-
-void program_cfg0(unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks);
-
-void program_cfg1(unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks);
-
-void program_rtr (unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks);
-
-void program_tr0 (unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks);
-
-void program_tr1 (void);
-
-void program_ecc (unsigned long	 num_bytes);
+#ifdef CONFIG_DDR_ECC
+static void program_ecc(unsigned long num_bytes);
+#endif
 
-unsigned
-long  program_bxcr(unsigned long* dimm_populated,
-		   unsigned char* iic0_dimm_addr,
-		   unsigned long  num_dimm_banks);
+static unsigned long program_bxcr(unsigned long *dimm_populated,
+				  unsigned char *iic0_dimm_addr,
+				  unsigned long num_dimm_banks);
 
 /*
  * This function is reading data from the DIMM module EEPROM over the SPD bus
@@ -328,7 +305,6 @@
  * BUG: Don't handle ECC memory
  * BUG: A few values in the TR register is currently hardcoded
  */
-
 long int spd_sdram(void) {
 	unsigned char iic0_dimm_addr[] = SPD_EEPROM_ADDRESS;
 	unsigned long dimm_populated[sizeof(iic0_dimm_addr)];
@@ -421,9 +397,8 @@
 	 */
 	while (1) {
 		mfsdram(mem_mcsts, mcsts);
-		if ((mcsts & SDRAM_MCSTS_MRSC) != 0) {
+		if ((mcsts & SDRAM_MCSTS_MRSC) != 0)
 			break;
-		}
 	}
 
 	/*
@@ -431,14 +406,17 @@
 	 */
 	program_tr1();
 
+#ifdef CONFIG_DDR_ECC
 	/*
-	 * if ECC is enabled, initialize parity bits
+	 * If ecc is enabled, initialize the parity bits.
 	 */
+	program_ecc(total_size);
+#endif
 
 	return total_size;
 }
 
-unsigned char spd_read(uchar chip, uint addr)
+static unsigned char spd_read(uchar chip, uint addr)
 {
 	unsigned char data[2];
 
@@ -460,9 +438,9 @@
 	return 0;
 }
 
-void get_spd_info(unsigned long*   dimm_populated,
-		  unsigned char*   iic0_dimm_addr,
-		  unsigned long	   num_dimm_banks)
+static void get_spd_info(unsigned long *dimm_populated,
+			 unsigned char *iic0_dimm_addr,
+			 unsigned long num_dimm_banks)
 {
 	unsigned long dimm_num;
 	unsigned long dimm_found;
@@ -480,14 +458,10 @@
 		if ((num_of_bytes != 0) && (total_size != 0)) {
 			dimm_populated[dimm_num] = TRUE;
 			dimm_found = TRUE;
-#if 0
-			printf("DIMM slot %lu: populated\n", dimm_num);
-#endif
+			debug("DIMM slot %lu: populated\n", dimm_num);
 		} else {
 			dimm_populated[dimm_num] = FALSE;
-#if 0
-			printf("DIMM slot %lu: Not populated\n", dimm_num);
-#endif
+			debug("DIMM slot %lu: Not populated\n", dimm_num);
 		}
 	}
 
@@ -497,9 +471,9 @@
 	}
 }
 
-void check_mem_type(unsigned long*   dimm_populated,
-		    unsigned char*   iic0_dimm_addr,
-		    unsigned long    num_dimm_banks)
+static void check_mem_type(unsigned long *dimm_populated,
+			   unsigned char *iic0_dimm_addr,
+			   unsigned long num_dimm_banks)
 {
 	unsigned long dimm_num;
 	unsigned char dimm_type;
@@ -509,9 +483,7 @@
 			dimm_type = spd_read(iic0_dimm_addr[dimm_num], 2);
 			switch (dimm_type) {
 			case 7:
-#if 0
-				printf("DIMM slot %lu: DDR SDRAM detected\n", dimm_num);
-#endif
+				debug("DIMM slot %lu: DDR SDRAM detected\n", dimm_num);
 				break;
 			default:
 				printf("ERROR: Unsupported DIMM detected in slot %lu.\n",
@@ -525,10 +497,9 @@
 	}
 }
 
-
-void check_volt_type(unsigned long*   dimm_populated,
-		     unsigned char*   iic0_dimm_addr,
-		     unsigned long    num_dimm_banks)
+static void check_volt_type(unsigned long *dimm_populated,
+			    unsigned char *iic0_dimm_addr,
+			    unsigned long num_dimm_banks)
 {
 	unsigned long dimm_num;
 	unsigned long voltage_type;
@@ -541,18 +512,16 @@
 				       dimm_num);
 				hang();
 			} else {
-#if 0
-				printf("DIMM %lu voltage level supported.\n", dimm_num);
-#endif
+				debug("DIMM %lu voltage level supported.\n", dimm_num);
 			}
 			break;
 		}
 	}
 }
 
-void program_cfg0(unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks)
+static void program_cfg0(unsigned long *dimm_populated,
+			 unsigned char *iic0_dimm_addr,
+			 unsigned long num_dimm_banks)
 {
 	unsigned long dimm_num;
 	unsigned long cfg0;
@@ -640,9 +609,9 @@
 	mtsdram(mem_cfg0, cfg0);
 }
 
-void program_cfg1(unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks)
+static void program_cfg1(unsigned long *dimm_populated,
+			 unsigned char *iic0_dimm_addr,
+			 unsigned long num_dimm_banks)
 {
 	unsigned long cfg1;
 	mfsdram(mem_cfg1, cfg1);
@@ -658,9 +627,9 @@
 	mtsdram(mem_cfg1, cfg1);
 }
 
-void program_rtr (unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks)
+static void program_rtr(unsigned long *dimm_populated,
+			unsigned char *iic0_dimm_addr,
+			unsigned long num_dimm_banks)
 {
 	unsigned long dimm_num;
 	unsigned long bus_period_x_10;
@@ -676,7 +645,6 @@
 	get_sys_info(&sys_info);
 	bus_period_x_10 = ONE_BILLION / (sys_info.freqPLB / 10);
 
-
 	for (dimm_num = 0;  dimm_num < num_dimm_banks; dimm_num++) {
 		if (dimm_populated[dimm_num] == TRUE) {
 			refresh_rate_type = 0x7F & spd_read(iic0_dimm_addr[dimm_num], 12);
@@ -719,9 +687,9 @@
 	mtsdram(mem_rtr, sdram_rtr);
 }
 
-void program_tr0 (unsigned long* dimm_populated,
-		  unsigned char* iic0_dimm_addr,
-		  unsigned long	 num_dimm_banks)
+static void program_tr0(unsigned long *dimm_populated,
+			 unsigned char *iic0_dimm_addr,
+			 unsigned long num_dimm_banks)
 {
 	unsigned long dimm_num;
 	unsigned long tr0;
@@ -1001,13 +969,73 @@
 		break;
 	}
 
-#if 0
-	printf("tr0: %x\n", tr0);
-#endif
+	debug("tr0: %x\n", tr0);
 	mtsdram(mem_tr0, tr0);
 }
 
+static int short_mem_test(void)
+{
+	unsigned long i, j;
+	unsigned long bxcr_num;
+	unsigned long *membase;
+	const unsigned long test[NUMMEMTESTS][NUMMEMWORDS] = {
+		{0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF,
+		 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF},
+		{0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000,
+		 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000},
+		{0xAAAAAAAA, 0xAAAAAAAA, 0x55555555, 0x55555555,
+		 0xAAAAAAAA, 0xAAAAAAAA, 0x55555555, 0x55555555},
+		{0x55555555, 0x55555555, 0xAAAAAAAA, 0xAAAAAAAA,
+		 0x55555555, 0x55555555, 0xAAAAAAAA, 0xAAAAAAAA},
+		{0xA5A5A5A5, 0xA5A5A5A5, 0x5A5A5A5A, 0x5A5A5A5A,
+		 0xA5A5A5A5, 0xA5A5A5A5, 0x5A5A5A5A, 0x5A5A5A5A},
+		{0x5A5A5A5A, 0x5A5A5A5A, 0xA5A5A5A5, 0xA5A5A5A5,
+		 0x5A5A5A5A, 0x5A5A5A5A, 0xA5A5A5A5, 0xA5A5A5A5},
+		{0xAA55AA55, 0xAA55AA55, 0x55AA55AA, 0x55AA55AA,
+		 0xAA55AA55, 0xAA55AA55, 0x55AA55AA, 0x55AA55AA},
+		{0x55AA55AA, 0x55AA55AA, 0xAA55AA55, 0xAA55AA55,
+		 0x55AA55AA, 0x55AA55AA, 0xAA55AA55, 0xAA55AA55}};
+
+	for (bxcr_num = 0; bxcr_num < MAXBXCR; bxcr_num++) {
+		mtdcr(memcfga, mem_b0cr + (bxcr_num << 2));
+		if ((mfdcr(memcfgd) & SDRAM_BXCR_SDBE) == SDRAM_BXCR_SDBE) {
+			/* Bank is enabled */
+			membase = (unsigned long*)
+				(mfdcr(memcfgd) & SDRAM_BXCR_SDBA_MASK);
+
+			/*
+			 * Run the short memory test
+			 */
+			for (i = 0; i < NUMMEMTESTS; i++) {
+				for (j = 0; j < NUMMEMWORDS; j++) {
+					membase[j] = test[i][j];
+					ppcDcbf((unsigned long)&(membase[j]));
+				}
+
+				for (j = 0; j < NUMMEMWORDS; j++) {
+					if (membase[j] != test[i][j]) {
+						ppcDcbf((unsigned long)&(membase[j]));
+						return 0;
+					}
+					ppcDcbf((unsigned long)&(membase[j]));
+				}
+
+				if (j < NUMMEMWORDS)
+					return 0;
+			}
+
+			/*
+			 * see if the rdclt value passed
+			 */
+			if (i < NUMMEMTESTS)
+				return 0;
+		}
+	}
+
+	return 1;
+}
+
-void program_tr1 (void)
+static void program_tr1(void)
 {
 	unsigned long tr0;
 	unsigned long tr1;
@@ -1015,8 +1043,7 @@
 	unsigned long ecc_temp;
 	unsigned long dlycal;
 	unsigned long dly_val;
-	unsigned long i, j, k;
-	unsigned long bxcr_num;
+	unsigned long k;
 	unsigned long max_pass_length;
 	unsigned long current_pass_length;
 	unsigned long current_fail_length;
@@ -1029,7 +1056,6 @@
 	unsigned char window_found;
 	unsigned char fail_found;
 	unsigned char pass_found;
-	unsigned long * membase;
 	PPC440_SYS_INFO sys_info;
 
 	/*
@@ -1079,55 +1105,16 @@
 	window_found = FALSE;
 	fail_found = FALSE;
 	pass_found = FALSE;
-#ifdef DEBUG
-	printf("Starting memory test ");
-#endif
+	debug("Starting memory test ");
+
 	for (k = 0; k < NUMHALFCYCLES; k++) {
-		for (rdclt = 0; rdclt < dly_val; rdclt++)  {
+		for (rdclt = 0; rdclt < dly_val; rdclt++) {
 			/*
 			 * Set the timing reg for the test.
 			 */
 			mtsdram(mem_tr1, (tr1 | SDRAM_TR1_RDCT_ENCODE(rdclt)));
 
-			for (bxcr_num = 0; bxcr_num < MAXBXCR; bxcr_num++) {
-				mtdcr(memcfga, mem_b0cr + (bxcr_num<<2));
-				if ((mfdcr(memcfgd) & SDRAM_BXCR_SDBE) == SDRAM_BXCR_SDBE) {
-					/* Bank is enabled */
-					membase = (unsigned long*)
-						(mfdcr(memcfgd) & SDRAM_BXCR_SDBA_MASK);
-
-					/*
-					 * Run the short memory test
-					 */
-					for (i = 0; i < NUMMEMTESTS; i++) {
-						for (j = 0; j < NUMMEMWORDS; j++) {
-							membase[j] = test[i][j];
-							ppcDcbf((unsigned long)&(membase[j]));
-						}
-
-						for (j = 0; j < NUMMEMWORDS; j++) {
-							if (membase[j] != test[i][j]) {
-								ppcDcbf((unsigned long)&(membase[j]));
-								break;
-							}
-							ppcDcbf((unsigned long)&(membase[j]));
-						}
-
-						if (j < NUMMEMWORDS) {
-							break;
-						}
-					}
-
-					/*
-					 * see if the rdclt value passed
-					 */
-					if (i < NUMMEMTESTS) {
-						break;
-					}
-				}
-			}
-
-			if (bxcr_num == MAXBXCR) {
+			if (short_mem_test()) {
 				if (fail_found == TRUE) {
 					pass_found = TRUE;
 					if (current_pass_length == 0) {
@@ -1157,9 +1144,8 @@
 				}
 			}
 		}
-#ifdef DEBUG
-		printf(".");
-#endif
+		debug(".");
+
 		if (window_found == TRUE) {
 			break;
 		}
@@ -1167,9 +1153,7 @@
 		tr1 = tr1 ^ SDRAM_TR1_RDCD_MASK;
 		rdclt_offset += dly_val;
 	}
-#ifdef DEBUG
-	printf("\n");
-#endif
+	debug("\n");
 
 	/*
 	 * make sure we find the window
@@ -1218,18 +1202,17 @@
 	}
 	tr1 |= SDRAM_TR1_RDCT_ENCODE(rdclt_average);
 
-#if 0
-	printf("tr1: %x\n", tr1);
-#endif
+	debug("tr1: %x\n", tr1);
+
 	/*
 	 * program SDRAM Timing Register 1 TR1
 	 */
 	mtsdram(mem_tr1, tr1);
 }
 
-unsigned long program_bxcr(unsigned long* dimm_populated,
-			   unsigned char* iic0_dimm_addr,
-			   unsigned long  num_dimm_banks)
+static unsigned long program_bxcr(unsigned long *dimm_populated,
+				  unsigned char *iic0_dimm_addr,
+				  unsigned long num_dimm_banks)
 {
 	unsigned long dimm_num;
 	unsigned long bank_base_addr;
@@ -1262,8 +1245,8 @@
 #ifdef CONFIG_BAMBOO
 	/*
 	 * This next section is hardware dependent and must be programmed
-	 * to match the hardware.  For bammboo, the following holds...
-	 * 1. SDRAM0_B0CR: Bank 0 of dimm 0 ctrl_bank_num : 0
+	 * to match the hardware.  For bamboo, the following holds...
+	 * 1. SDRAM0_B0CR: Bank 0 of dimm 0 ctrl_bank_num : 0 (soldered onboard)
 	 * 2. SDRAM0_B1CR: Bank 0 of dimm 1 ctrl_bank_num : 1
 	 * 3. SDRAM0_B2CR: Bank 1 of dimm 1 ctrl_bank_num : 1
 	 * 4. SDRAM0_B3CR: Bank 0 of dimm 2 ctrl_bank_num : 3
@@ -1273,10 +1256,12 @@
 	ctrl_bank_num[1] = 1;
 	ctrl_bank_num[2] = 3;
 #else
+	/*
+	 * Ocotea, Ebony and the other IBM/AMCC eval boards have
+	 * 2 DIMM slots with each max 2 banks
+	 */
 	ctrl_bank_num[0] = 0;
-	ctrl_bank_num[1] = 1;
-	ctrl_bank_num[2] = 2;
-	ctrl_bank_num[3] = 3;
+	ctrl_bank_num[1] = 2;
 #endif
 
 	/*
@@ -1290,6 +1275,8 @@
 			num_col_addr = spd_read(iic0_dimm_addr[dimm_num], 4);
 			num_banks    = spd_read(iic0_dimm_addr[dimm_num], 5);
 			bank_size_id = spd_read(iic0_dimm_addr[dimm_num], 31);
+			debug("DIMM%d: row=%d col=%d banks=%d\n", dimm_num,
+			      num_row_addr, num_col_addr, num_banks);
 
 			/*
 			 * Set the SDRAM0_BxCR regs
@@ -1354,9 +1341,12 @@
 			cr |= SDRAM_BXCR_SDBE;
 
  			for (i = 0; i < num_banks; i++) {
-				bank_parms[ctrl_bank_num[dimm_num]+i+dimm_num].bank_size_bytes =
-					(4 * 1024 * 1024) * bank_size_id;
-				bank_parms[ctrl_bank_num[dimm_num]+i+dimm_num].cr = cr;
+				bank_parms[ctrl_bank_num[dimm_num]+i].bank_size_bytes =
+					(4 << 20) * bank_size_id;
+				bank_parms[ctrl_bank_num[dimm_num]+i].cr = cr;
+				debug("DIMM%d-bank %d (SDRAM0_B%dCR): bank_size_bytes=%d\n",
+				      dimm_num, i, ctrl_bank_num[dimm_num]+i,
+				      bank_parms[ctrl_bank_num[dimm_num]+i].bank_size_bytes);
  			}
 		}
 	}
@@ -1400,13 +1390,15 @@
 				bank_parms[sorted_bank_num[bx_cr_num]].cr;
 			mtdcr(memcfgd, temp);
 			bank_base_addr += bank_parms[sorted_bank_num[bx_cr_num]].bank_size_bytes;
+			debug("SDRAM0_B%dCR=0x%08lx\n", sorted_bank_num[bx_cr_num], temp);
 		}
 	}
 
 	return(bank_base_addr);
 }
 
-void program_ecc (unsigned long	 num_bytes)
+#ifdef CONFIG_DDR_ECC
+static void program_ecc(unsigned long num_bytes)
 {
 	unsigned long bank_base_addr;
 	unsigned long current_address;
@@ -1425,14 +1417,12 @@
 	bank_base_addr = CFG_SDRAM_BASE;
 
 	if ((cfg0 & SDRAM_CFG0_MCHK_MASK) != SDRAM_CFG0_MCHK_NON) {
-		mtsdram(mem_cfg0, (cfg0 & ~SDRAM_CFG0_MCHK_MASK) |
-			SDRAM_CFG0_MCHK_GEN);
+		mtsdram(mem_cfg0, (cfg0 & ~SDRAM_CFG0_MCHK_MASK) | SDRAM_CFG0_MCHK_GEN);
 
-		if ((cfg0 & SDRAM_CFG0_DMWD_MASK) == SDRAM_CFG0_DMWD_32) {
+		if ((cfg0 & SDRAM_CFG0_DMWD_MASK) == SDRAM_CFG0_DMWD_32)
 			address_increment = 4;
-		} else {
+		else
 			address_increment = 8;
-		}
 
 		current_address = (unsigned long)(bank_base_addr);
 		end_address = (unsigned long)(bank_base_addr) + num_bytes;
@@ -1446,4 +1436,5 @@
 			SDRAM_CFG0_MCHK_CHK);
 	}
 }
+#endif /* CONFIG_DDR_ECC */
 #endif /* CONFIG_SPD_EEPROM */
diff --git a/cpu/ppc4xx/ndfc.c b/cpu/ppc4xx/ndfc.c
index 08dfc32..f63fc79 100644
--- a/cpu/ppc4xx/ndfc.c
+++ b/cpu/ppc4xx/ndfc.c
@@ -3,7 +3,7 @@
  *   Platform independend driver for NDFC (NanD Flash Controller)
  *   integrated into EP440 cores
  *
- * (C) Copyright 2006
+ * (C) Copyright 2006-2007
  * Stefan Roese, DENX Software Engineering, sr@denx.de.
  *
  * Based on original work by
@@ -38,7 +38,9 @@
 
 #include <nand.h>
 #include <linux/mtd/ndfc.h>
+#include <linux/mtd/nand_ecc.h>
 #include <asm/processor.h>
+#include <asm/io.h>
 #include <ppc4xx.h>
 
 static u8 hwctl = 0;
@@ -70,11 +72,11 @@
 	ulong base = (ulong) this->IO_ADDR_W & 0xfffffffc;
 
 	if (hwctl & 0x1)
-		out8(base + NDFC_CMD, byte);
+		out_8((u8 *)(base + NDFC_CMD), byte);
 	else if (hwctl & 0x2)
-		out8(base + NDFC_ALE, byte);
+		out_8((u8 *)(base + NDFC_ALE), byte);
 	else
-		out8(base + NDFC_DATA, byte);
+		out_8((u8 *)(base + NDFC_DATA), byte);
 }
 
 static u_char ndfc_read_byte(struct mtd_info *mtdinfo)
@@ -82,7 +84,7 @@
 	struct nand_chip *this = mtdinfo->priv;
 	ulong base = (ulong) this->IO_ADDR_W & 0xfffffffc;
 
-	return (in8(base + NDFC_DATA));
+	return (in_8((u8 *)(base + NDFC_DATA)));
 }
 
 static int ndfc_dev_ready(struct mtd_info *mtdinfo)
@@ -90,17 +92,41 @@
 	struct nand_chip *this = mtdinfo->priv;
 	ulong base = (ulong) this->IO_ADDR_W & 0xfffffffc;
 
-	while (!(in32(base + NDFC_STAT) & NDFC_STAT_IS_READY))
+	while (!(in_be32((u32 *)(base + NDFC_STAT)) & NDFC_STAT_IS_READY))
 		;
 
 	return 1;
 }
 
-#ifndef CONFIG_NAND_SPL
-/*
- * Don't use these speedup functions in NAND boot image, since the image
- * has to fit into 4kByte.
- */
+static void ndfc_enable_hwecc(struct mtd_info *mtdinfo, int mode)
+{
+	struct nand_chip *this = mtdinfo->priv;
+	ulong base = (ulong) this->IO_ADDR_W & 0xfffffffc;
+	u32 ccr;
+
+	ccr = in_be32((u32 *)(base + NDFC_CCR));
+	ccr |= NDFC_CCR_RESET_ECC;
+	out_be32((u32 *)(base + NDFC_CCR), ccr);
+}
+
+static int ndfc_calculate_ecc(struct mtd_info *mtdinfo,
+			      const u_char *dat, u_char *ecc_code)
+{
+	struct nand_chip *this = mtdinfo->priv;
+	ulong base = (ulong) this->IO_ADDR_W & 0xfffffffc;
+	u32 ecc;
+	u8 *p = (u8 *)&ecc;
+
+	ecc = in_be32((u32 *)(base + NDFC_ECC));
+
+	/* The NDFC uses Smart Media (SMC) bytes order
+	 */
+	ecc_code[0] = p[2];
+	ecc_code[1] = p[1];
+	ecc_code[2] = p[3];
+
+	return 0;
+}
 
 /*
  * Speedups for buffer read/write/verify
@@ -116,9 +142,14 @@
 	uint32_t *p = (uint32_t *) buf;
 
 	for (;len > 0; len -= 4)
-		*p++ = in32(base + NDFC_DATA);
+		*p++ = in_be32((u32 *)(base + NDFC_DATA));
 }
 
+#ifndef CONFIG_NAND_SPL
+/*
+ * Don't use these speedup functions in NAND boot image, since the image
+ * has to fit into 4kByte.
+ */
 static void ndfc_write_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int len)
 {
 	struct nand_chip *this = mtdinfo->priv;
@@ -126,7 +157,7 @@
 	uint32_t *p = (uint32_t *) buf;
 
 	for (; len > 0; len -= 4)
-		out32(base + NDFC_DATA, *p++);
+		out_be32((u32 *)(base + NDFC_DATA), *p++);
 }
 
 static int ndfc_verify_buf(struct mtd_info *mtdinfo, const uint8_t *buf, int len)
@@ -136,7 +167,7 @@
 	uint32_t *p = (uint32_t *) buf;
 
 	for (; len > 0; len -= 4)
-		if (*p++ != in32(base + NDFC_DATA))
+		if (*p++ != in_be32((u32 *)(base + NDFC_DATA)))
 			return -1;
 
 	return 0;
@@ -153,8 +184,8 @@
 	ulong base = (ulong)nand->IO_ADDR_W & 0xfffffffc;
 
 	/* Set NandFlash Core Configuration Register */
-	/* 1col x 2 rows */
-	out32(base + NDFC_CCR, 0x00000000 | (cs << 24));
+	/* 1 col x 2 rows */
+	out_be32((u32 *)(base + NDFC_CCR), 0x00000000 | (cs << 24));
 }
 
 int board_nand_init(struct nand_chip *nand)
@@ -162,16 +193,19 @@
 	int cs = (ulong)nand->IO_ADDR_W & 0x00000003;
 	ulong base = (ulong)nand->IO_ADDR_W & 0xfffffffc;
 
-	nand->eccmode = NAND_ECC_SOFT;
-
 	nand->hwcontrol  = ndfc_hwcontrol;
 	nand->read_byte  = ndfc_read_byte;
+	nand->read_buf   = ndfc_read_buf;
 	nand->write_byte = ndfc_write_byte;
 	nand->dev_ready  = ndfc_dev_ready;
 
+	nand->eccmode = NAND_ECC_HW3_256;
+	nand->enable_hwecc = ndfc_enable_hwecc;
+	nand->calculate_ecc = ndfc_calculate_ecc;
+	nand->correct_data = nand_correct_data;
+
 #ifndef CONFIG_NAND_SPL
 	nand->write_buf  = ndfc_write_buf;
-	nand->read_buf   = ndfc_read_buf;
 	nand->verify_buf = ndfc_verify_buf;
 #else
 	/*
@@ -187,7 +221,7 @@
 	 * Select required NAND chip in NDFC
 	 */
 	board_nand_select_device(nand, cs);
-	out32(base + NDFC_BCFG0 + (cs << 2), 0x80002222);
+	out_be32((u32 *)(base + NDFC_BCFG0 + (cs << 2)), 0x80002222);
 	return 0;
 }
 
diff --git a/cpu/ppc4xx/start.S b/cpu/ppc4xx/start.S
index 85660b4..78de300 100644
--- a/cpu/ppc4xx/start.S
+++ b/cpu/ppc4xx/start.S
@@ -110,6 +110,13 @@
 # endif
 #endif /* CFG_INIT_DCACHE_CS */
 
+#define function_prolog(func_name)      .text; \
+					.align 2; \
+					.globl func_name; \
+					func_name:
+#define function_epilog(func_name)      .type func_name,@function; \
+					.size func_name,.-func_name
+
 /* We don't want the  MMU yet.
 */
 #undef	MSR_KERNEL
@@ -388,8 +395,9 @@
 2:
 
 #if defined(CONFIG_NAND_SPL)
+#if defined(CONFIG_440EPX) || defined(CONFIG_440GRX)
 	/*
-	 * Enable internal SRAM
+	 * Enable internal SRAM (only on 440EPx/GRx, 440EP/GR have no OCM)
 	 */
 	lis	r2,0x7fff
 	ori	r2,r2,0xffff
@@ -399,6 +407,45 @@
 	mfdcr	r1,isram0_pmeg
 	and	r1,r1,r2		/* Disable pwr mgmt */
 	mtdcr	isram0_pmeg,r1
+#endif
+#if defined(CONFIG_440EP)
+	/*
+	 * On 440EP with no internal SRAM, we setup SDRAM very early
+	 * and copy the NAND_SPL to SDRAM and jump to it
+	 */
+	/* Clear Dcache to use as RAM */
+	addis	r3,r0,CFG_INIT_RAM_ADDR@h
+	ori	r3,r3,CFG_INIT_RAM_ADDR@l
+	addis	r4,r0,CFG_INIT_RAM_END@h
+	ori	r4,r4,CFG_INIT_RAM_END@l
+	rlwinm. r5,r4,0,27,31
+	rlwinm	r5,r4,27,5,31
+	beq	..d_ran3
+	addi	r5,r5,0x0001
+..d_ran3:
+	mtctr	r5
+..d_ag3:
+	dcbz	r0,r3
+	addi	r3,r3,32
+	bdnz	..d_ag3
+	/*----------------------------------------------------------------*/
+	/* Setup the stack in internal SRAM */
+	/*----------------------------------------------------------------*/
+	lis	r1,CFG_INIT_RAM_ADDR@h
+	ori	r1,r1,CFG_INIT_SP_OFFSET@l
+	li	r0,0
+	stwu	r0,-4(r1)
+	stwu	r0,-4(r1)		/* Terminate call chain */
+
+	stwu	r1,-8(r1)		/* Save back chain and move SP */
+	lis	r0,RESET_VECTOR@h	/* Address of reset vector */
+	ori	r0,r0, RESET_VECTOR@l
+	stwu	r1,-8(r1)		/* Save back chain and move SP */
+	stw	r0,+12(r1)		/* Save return addr (underflow vect) */
+	sync
+	bl	early_sdram_init
+	sync
+#endif /* CONFIG_440EP */
 
 	/*
 	 * Copy SPL from cache into internal SRAM
@@ -429,7 +476,7 @@
 start_ram:
 	sync
 	isync
-#endif
+#endif /* CONFIG_NAND_SPL */
 
 	bl	3f
 	b	_start
@@ -1137,7 +1184,6 @@
 	lwz	r1,GPR1(r1)
 	SYNC
 	rfci
-#endif /* CONFIG_NAND_SPL */
 
 /* Cache functions.
 */
@@ -1255,24 +1301,6 @@
 	blr
 
 /*------------------------------------------------------------------------------- */
-/* Function:	 in8 */
-/* Description:	 Input 8 bits */
-/*------------------------------------------------------------------------------- */
-	.globl	in8
-in8:
-	lbz	r3,0x0000(r3)
-	blr
-
-/*------------------------------------------------------------------------------- */
-/* Function:	 out8 */
-/* Description:	 Output 8 bits */
-/*------------------------------------------------------------------------------- */
-	.globl	out8
-out8:
-	stb	r4,0x0000(r3)
-	blr
-
-/*------------------------------------------------------------------------------- */
 /* Function:	 out16 */
 /* Description:	 Output 16 bits */
 /*------------------------------------------------------------------------------- */
@@ -1291,15 +1319,6 @@
 	blr
 
 /*------------------------------------------------------------------------------- */
-/* Function:	 out32 */
-/* Description:	 Output 32 bits */
-/*------------------------------------------------------------------------------- */
-	.globl	out32
-out32:
-	stw	r4,0x0000(r3)
-	blr
-
-/*------------------------------------------------------------------------------- */
 /* Function:	 out32r */
 /* Description:	 Byte reverse and output 32 bits */
 /*------------------------------------------------------------------------------- */
@@ -1327,15 +1346,6 @@
 	blr
 
 /*------------------------------------------------------------------------------- */
-/* Function:	 in32 */
-/* Description:	 Input 32 bits */
-/*------------------------------------------------------------------------------- */
-	.globl	in32
-in32:
-	lwz	3,0x0000(3)
-	blr
-
-/*------------------------------------------------------------------------------- */
 /* Function:	 in32r */
 /* Description:	 Input 32 bits and byte reverse */
 /*------------------------------------------------------------------------------- */
@@ -1377,9 +1387,6 @@
 	sync
 	blr
 
-/*------------------------------------------------------------------------------*/
-
-#ifndef CONFIG_NAND_SPL
 /*
  * void relocate_code (addr_sp, gd, addr_moni)
  *
@@ -1644,8 +1651,88 @@
 	stw	r0, 4(r7)
 
 	blr
+
+#if defined(CONFIG_440)
+/*----------------------------------------------------------------------------+
+| dcbz_area.
++----------------------------------------------------------------------------*/
+	function_prolog(dcbz_area)
+	rlwinm. r5,r4,0,27,31
+	rlwinm  r5,r4,27,5,31
+	beq     ..d_ra2
+	addi    r5,r5,0x0001
+..d_ra2:mtctr   r5
+..d_ag2:dcbz    r0,r3
+	addi    r3,r3,32
+	bdnz    ..d_ag2
+	sync
+	blr
+	function_epilog(dcbz_area)
+
+/*----------------------------------------------------------------------------+
+| dflush.  Assume 32K at vector address is cachable.
++----------------------------------------------------------------------------*/
+	function_prolog(dflush)
+	mfmsr   r9
+	rlwinm  r8,r9,0,15,13
+	rlwinm  r8,r8,0,17,15
+	mtmsr   r8
+	addi    r3,r0,0x0000
+	mtspr   dvlim,r3
+	mfspr   r3,ivpr
+	addi    r4,r0,1024
+	mtctr   r4
+..dflush_loop:
+	lwz     r6,0x0(r3)
+	addi    r3,r3,32
+	bdnz    ..dflush_loop
+	addi    r3,r3,-32
+	mtctr   r4
+..ag:   dcbf    r0,r3
+	addi    r3,r3,-32
+	bdnz    ..ag
+	sync
+	mtmsr   r9
+	blr
+	function_epilog(dflush)
+#endif /* CONFIG_440 */
 #endif /* CONFIG_NAND_SPL */
 
+/*------------------------------------------------------------------------------- */
+/* Function:	 in8 */
+/* Description:	 Input 8 bits */
+/*------------------------------------------------------------------------------- */
+	.globl	in8
+in8:
+	lbz	r3,0x0000(r3)
+	blr
+
+/*------------------------------------------------------------------------------- */
+/* Function:	 out8 */
+/* Description:	 Output 8 bits */
+/*------------------------------------------------------------------------------- */
+	.globl	out8
+out8:
+	stb	r4,0x0000(r3)
+	blr
+
+/*------------------------------------------------------------------------------- */
+/* Function:	 out32 */
+/* Description:	 Output 32 bits */
+/*------------------------------------------------------------------------------- */
+	.globl	out32
+out32:
+	stw	r4,0x0000(r3)
+	blr
+
+/*------------------------------------------------------------------------------- */
+/* Function:	 in32 */
+/* Description:	 Input 32 bits */
+/*------------------------------------------------------------------------------- */
+	.globl	in32
+in32:
+	lwz	3,0x0000(3)
+	blr
 
 /**************************************************************************/
 /* PPC405EP specific stuff						  */
@@ -1892,13 +1979,6 @@
 #endif /* CONFIG_405EP */
 
 #if defined(CONFIG_440)
-#define function_prolog(func_name)	.text; \
-					.align 2; \
-					.globl func_name; \
-					func_name:
-#define function_epilog(func_name)	.type func_name,@function; \
-					.size func_name,.-func_name
-
 /*----------------------------------------------------------------------------+
 | mttlb3.
 +----------------------------------------------------------------------------*/
@@ -1946,47 +2026,4 @@
 	TLBRE(3,3,0)
 	blr
 	function_epilog(mftlb1)
-
-/*----------------------------------------------------------------------------+
-| dcbz_area.
-+----------------------------------------------------------------------------*/
-	function_prolog(dcbz_area)
-	rlwinm. r5,r4,0,27,31
-	rlwinm	r5,r4,27,5,31
-	beq	..d_ra2
-	addi	r5,r5,0x0001
-..d_ra2:mtctr	r5
-..d_ag2:dcbz	r0,r3
-	addi	r3,r3,32
-	bdnz	..d_ag2
-	sync
-	blr
-	function_epilog(dcbz_area)
-
-/*----------------------------------------------------------------------------+
-| dflush.  Assume 32K at vector address is cachable.
-+----------------------------------------------------------------------------*/
-	function_prolog(dflush)
-	mfmsr	r9
-	rlwinm	r8,r9,0,15,13
-	rlwinm	r8,r8,0,17,15
-	mtmsr	r8
-	addi	r3,r0,0x0000
-	mtspr	dvlim,r3
-	mfspr	r3,ivpr
-	addi	r4,r0,1024
-	mtctr	r4
-..dflush_loop:
-	lwz	r6,0x0(r3)
-	addi	r3,r3,32
-	bdnz	..dflush_loop
-	addi	r3,r3,-32
-	mtctr	r4
-..ag:	dcbf	r0,r3
-	addi	r3,r3,-32
-	bdnz	..ag
-	sync
-	mtmsr	r9
-	blr
-	function_epilog(dflush)
 #endif /* CONFIG_440 */
diff --git a/drivers/nand/nand_ecc.c b/drivers/nand/nand_ecc.c
index f33be96..90274e6 100644
--- a/drivers/nand/nand_ecc.c
+++ b/drivers/nand/nand_ecc.c
@@ -40,6 +40,13 @@
 #if (CONFIG_COMMANDS & CFG_CMD_NAND) && !defined(CFG_NAND_LEGACY)
 
 #include<linux/mtd/mtd.h>
+
+/*
+ * NAND-SPL has no sofware ECC for now, so don't include nand_calculate_ecc(),
+ * only nand_correct_data() is needed
+ */
+
+#ifndef CONFIG_NAND_SPL
 /*
  * Pre-calculated 256-way 1 byte column parity
  */
@@ -62,90 +69,75 @@
 	0x00, 0x55, 0x56, 0x03, 0x59, 0x0c, 0x0f, 0x5a, 0x5a, 0x0f, 0x0c, 0x59, 0x03, 0x56, 0x55, 0x00
 };
 
-
-/**
- * nand_trans_result - [GENERIC] create non-inverted ECC
- * @reg2:	line parity reg 2
- * @reg3:	line parity reg 3
- * @ecc_code:	ecc
- *
- * Creates non-inverted ECC code from line parity
- */
-static void nand_trans_result(u_char reg2, u_char reg3,
-	u_char *ecc_code)
-{
-	u_char a, b, i, tmp1, tmp2;
-
-	/* Initialize variables */
-	a = b = 0x80;
-	tmp1 = tmp2 = 0;
-
-	/* Calculate first ECC byte */
-	for (i = 0; i < 4; i++) {
-		if (reg3 & a)		/* LP15,13,11,9 --> ecc_code[0] */
-			tmp1 |= b;
-		b >>= 1;
-		if (reg2 & a)		/* LP14,12,10,8 --> ecc_code[0] */
-			tmp1 |= b;
-		b >>= 1;
-		a >>= 1;
-	}
-
-	/* Calculate second ECC byte */
-	b = 0x80;
-	for (i = 0; i < 4; i++) {
-		if (reg3 & a)		/* LP7,5,3,1 --> ecc_code[1] */
-			tmp2 |= b;
-		b >>= 1;
-		if (reg2 & a)		/* LP6,4,2,0 --> ecc_code[1] */
-			tmp2 |= b;
-		b >>= 1;
-		a >>= 1;
-	}
-
-	/* Store two of the ECC bytes */
-	ecc_code[0] = tmp1;
-	ecc_code[1] = tmp2;
-}
-
 /**
- * nand_calculate_ecc - [NAND Interface] Calculate 3 byte ECC code for 256 byte block
+ * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256-byte block
  * @mtd:	MTD block structure
  * @dat:	raw data
  * @ecc_code:	buffer for ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code)
+int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+		       u_char *ecc_code)
 {
-	u_char idx, reg1, reg2, reg3;
-	int j;
+	uint8_t idx, reg1, reg2, reg3, tmp1, tmp2;
+	int i;
 
 	/* Initialize variables */
 	reg1 = reg2 = reg3 = 0;
-	ecc_code[0] = ecc_code[1] = ecc_code[2] = 0;
 
 	/* Build up column parity */
-	for(j = 0; j < 256; j++) {
-
+	for(i = 0; i < 256; i++) {
 		/* Get CP0 - CP5 from table */
-		idx = nand_ecc_precalc_table[dat[j]];
+		idx = nand_ecc_precalc_table[*dat++];
 		reg1 ^= (idx & 0x3f);
 
 		/* All bit XOR = 1 ? */
 		if (idx & 0x40) {
-			reg3 ^= (u_char) j;
-			reg2 ^= ~((u_char) j);
+			reg3 ^= (uint8_t) i;
+			reg2 ^= ~((uint8_t) i);
 		}
 	}
 
 	/* Create non-inverted ECC code from line parity */
-	nand_trans_result(reg2, reg3, ecc_code);
+	tmp1  = (reg3 & 0x80) >> 0; /* B7 -> B7 */
+	tmp1 |= (reg2 & 0x80) >> 1; /* B7 -> B6 */
+	tmp1 |= (reg3 & 0x40) >> 1; /* B6 -> B5 */
+	tmp1 |= (reg2 & 0x40) >> 2; /* B6 -> B4 */
+	tmp1 |= (reg3 & 0x20) >> 2; /* B5 -> B3 */
+	tmp1 |= (reg2 & 0x20) >> 3; /* B5 -> B2 */
+	tmp1 |= (reg3 & 0x10) >> 3; /* B4 -> B1 */
+	tmp1 |= (reg2 & 0x10) >> 4; /* B4 -> B0 */
+
+	tmp2  = (reg3 & 0x08) << 4; /* B3 -> B7 */
+	tmp2 |= (reg2 & 0x08) << 3; /* B3 -> B6 */
+	tmp2 |= (reg3 & 0x04) << 3; /* B2 -> B5 */
+	tmp2 |= (reg2 & 0x04) << 2; /* B2 -> B4 */
+	tmp2 |= (reg3 & 0x02) << 2; /* B1 -> B3 */
+	tmp2 |= (reg2 & 0x02) << 1; /* B1 -> B2 */
+	tmp2 |= (reg3 & 0x01) << 1; /* B0 -> B1 */
+	tmp2 |= (reg2 & 0x01) << 0; /* B7 -> B0 */
 
 	/* Calculate final ECC code */
-	ecc_code[0] = ~ecc_code[0];
-	ecc_code[1] = ~ecc_code[1];
+#ifdef CONFIG_MTD_NAND_ECC_SMC
+	ecc_code[0] = ~tmp2;
+	ecc_code[1] = ~tmp1;
+#else
+	ecc_code[0] = ~tmp1;
+	ecc_code[1] = ~tmp2;
+#endif
 	ecc_code[2] = ((~reg1) << 2) | 0x03;
+
 	return 0;
 }
+#endif /* CONFIG_NAND_SPL */
+
+static inline int countbits(uint32_t byte)
+{
+	int res = 0;
+
+	for (;byte; byte >>= 1)
+		res += byte & 0x01;
+	return res;
+}
 
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
@@ -156,88 +148,52 @@
  *
  * Detect and correct a 1 bit error for 256 byte block
  */
-int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc)
+int nand_correct_data(struct mtd_info *mtd, u_char *dat,
+		      u_char *read_ecc, u_char *calc_ecc)
 {
-	u_char a, b, c, d1, d2, d3, add, bit, i;
+	uint8_t s0, s1, s2;
 
-	/* Do error detection */
-	d1 = calc_ecc[0] ^ read_ecc[0];
-	d2 = calc_ecc[1] ^ read_ecc[1];
-	d3 = calc_ecc[2] ^ read_ecc[2];
-
-	if ((d1 | d2 | d3) == 0) {
-		/* No errors */
+#ifdef CONFIG_MTD_NAND_ECC_SMC
+	s0 = calc_ecc[0] ^ read_ecc[0];
+	s1 = calc_ecc[1] ^ read_ecc[1];
+	s2 = calc_ecc[2] ^ read_ecc[2];
+#else
+	s1 = calc_ecc[0] ^ read_ecc[0];
+	s0 = calc_ecc[1] ^ read_ecc[1];
+	s2 = calc_ecc[2] ^ read_ecc[2];
+#endif
+	if ((s0 | s1 | s2) == 0)
 		return 0;
-	}
-	else {
-		a = (d1 ^ (d1 >> 1)) & 0x55;
-		b = (d2 ^ (d2 >> 1)) & 0x55;
-		c = (d3 ^ (d3 >> 1)) & 0x54;
 
-		/* Found and will correct single bit error in the data */
-		if ((a == 0x55) && (b == 0x55) && (c == 0x54)) {
-			c = 0x80;
-			add = 0;
-			a = 0x80;
-			for (i=0; i<4; i++) {
-				if (d1 & c)
-					add |= a;
-				c >>= 2;
-				a >>= 1;
-			}
-			c = 0x80;
-			for (i=0; i<4; i++) {
-				if (d2 & c)
-					add |= a;
-				c >>= 2;
-				a >>= 1;
-			}
-			bit = 0;
-			b = 0x04;
-			c = 0x80;
-			for (i=0; i<3; i++) {
-				if (d3 & c)
-					bit |= b;
-				c >>= 2;
-				b >>= 1;
-			}
-			b = 0x01;
-			a = dat[add];
-			a ^= (b << bit);
-			dat[add] = a;
-			return 1;
-		} else {
-			i = 0;
-			while (d1) {
-				if (d1 & 0x01)
-					++i;
-				d1 >>= 1;
-			}
-			while (d2) {
-				if (d2 & 0x01)
-					++i;
-				d2 >>= 1;
-			}
-			while (d3) {
-				if (d3 & 0x01)
-					++i;
-				d3 >>= 1;
-			}
-			if (i == 1) {
-				/* ECC Code Error Correction */
-				read_ecc[0] = calc_ecc[0];
-				read_ecc[1] = calc_ecc[1];
-				read_ecc[2] = calc_ecc[2];
-				return 2;
-			}
-			else {
-				/* Uncorrectable Error */
-				return -1;
-			}
-		}
+	/* Check for a single bit error */
+	if( ((s0 ^ (s0 >> 1)) & 0x55) == 0x55 &&
+	    ((s1 ^ (s1 >> 1)) & 0x55) == 0x55 &&
+	    ((s2 ^ (s2 >> 1)) & 0x54) == 0x54) {
+
+		uint32_t byteoffs, bitnum;
+
+		byteoffs = (s1 << 0) & 0x80;
+		byteoffs |= (s1 << 1) & 0x40;
+		byteoffs |= (s1 << 2) & 0x20;
+		byteoffs |= (s1 << 3) & 0x10;
+
+		byteoffs |= (s0 >> 4) & 0x08;
+		byteoffs |= (s0 >> 3) & 0x04;
+		byteoffs |= (s0 >> 2) & 0x02;
+		byteoffs |= (s0 >> 1) & 0x01;
+
+		bitnum = (s2 >> 5) & 0x04;
+		bitnum |= (s2 >> 4) & 0x02;
+		bitnum |= (s2 >> 3) & 0x01;
+
+		dat[byteoffs] ^= (1 << bitnum);
+
+		return 1;
 	}
 
-	/* Should never happen */
+	if(countbits(s0 | ((uint32_t)s1 << 8) | ((uint32_t)s2 <<16)) == 1)
+		return 1;
+
 	return -1;
 }
 
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index bbc9ba0..03289bc 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -105,6 +105,11 @@
 	__asm__ __volatile__ ("sync" : : : "memory");
 }
 
+static inline void isync(void)
+{
+	__asm__ __volatile__ ("isync" : : : "memory");
+}
+
 /* Enforce in-order execution of data I/O.
  * No distinction between read/write on PPC; use eieio for all three.
  */
@@ -114,74 +119,90 @@
 
 /*
  * 8, 16 and 32 bit, big and little endian I/O operations, with barrier.
+ *
+ * Read operations have additional twi & isync to make sure the read
+ * is actually performed (i.e. the data has come back) before we start
+ * executing any following instructions.
  */
-extern inline int in_8(volatile u8 *addr)
+#define __iomem
+extern inline int in_8(const volatile unsigned char __iomem *addr)
 {
-    int ret;
+	int ret;
 
-    __asm__ __volatile__("lbz%U1%X1 %0,%1; eieio" : "=r" (ret) : "m" (*addr));
-    return ret;
+	__asm__ __volatile__(
+		"sync; lbz%U1%X1 %0,%1;\n"
+		"twi 0,%0,0;\n"
+		"isync" : "=r" (ret) : "m" (*addr));
+	return ret;
 }
 
-extern inline void out_8(volatile u8 *addr, int val)
+extern inline void out_8(volatile unsigned char __iomem *addr, int val)
 {
-    __asm__ __volatile__("stb%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val));
+	__asm__ __volatile__("stb%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val));
 }
 
-extern inline int in_le16(volatile u16 *addr)
+extern inline int in_le16(const volatile unsigned short __iomem *addr)
 {
-    int ret;
+	int ret;
 
-    __asm__ __volatile__("lhbrx %0,0,%1; eieio" : "=r" (ret) :
-		  "r" (addr), "m" (*addr));
-    return ret;
+	__asm__ __volatile__("sync; lhbrx %0,0,%1;\n"
+			     "twi 0,%0,0;\n"
+			     "isync" : "=r" (ret) :
+			      "r" (addr), "m" (*addr));
+	return ret;
 }
 
-extern inline int in_be16(volatile u16 *addr)
+extern inline int in_be16(const volatile unsigned short __iomem *addr)
 {
-    int ret;
+	int ret;
 
-    __asm__ __volatile__("lhz%U1%X1 %0,%1; eieio" : "=r" (ret) : "m" (*addr));
-    return ret;
+	__asm__ __volatile__("sync; lhz%U1%X1 %0,%1;\n"
+			     "twi 0,%0,0;\n"
+			     "isync" : "=r" (ret) : "m" (*addr));
+	return ret;
 }
 
-extern inline void out_le16(volatile u16 *addr, int val)
+extern inline void out_le16(volatile unsigned short __iomem *addr, int val)
 {
-    __asm__ __volatile__("sthbrx %1,0,%2; eieio" : "=m" (*addr) :
-		  "r" (val), "r" (addr));
+	__asm__ __volatile__("sync; sthbrx %1,0,%2" : "=m" (*addr) :
+			      "r" (val), "r" (addr));
 }
 
-extern inline void out_be16(volatile u16 *addr, int val)
+extern inline void out_be16(volatile unsigned short __iomem *addr, int val)
 {
-    __asm__ __volatile__("sth%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val));
+	__asm__ __volatile__("sync; sth%U0%X0 %1,%0" : "=m" (*addr) : "r" (val));
 }
 
-extern inline unsigned in_le32(volatile u32 *addr)
+extern inline unsigned in_le32(const volatile unsigned __iomem *addr)
 {
-    unsigned ret;
+	unsigned ret;
 
-    __asm__ __volatile__("lwbrx %0,0,%1; eieio" : "=r" (ret) :
-		 "r" (addr), "m" (*addr));
-    return ret;
+	__asm__ __volatile__("sync; lwbrx %0,0,%1;\n"
+			     "twi 0,%0,0;\n"
+			     "isync" : "=r" (ret) :
+			     "r" (addr), "m" (*addr));
+	return ret;
 }
 
-extern inline unsigned in_be32(volatile u32 *addr)
+extern inline unsigned in_be32(const volatile unsigned __iomem *addr)
 {
-    unsigned ret;
+	unsigned ret;
 
-    __asm__ __volatile__("lwz%U1%X1 %0,%1; eieio" : "=r" (ret) : "m" (*addr));
-    return ret;
+	__asm__ __volatile__("sync; lwz%U1%X1 %0,%1;\n"
+			     "twi 0,%0,0;\n"
+			     "isync" : "=r" (ret) : "m" (*addr));
+	return ret;
 }
 
-extern inline void out_le32(volatile unsigned *addr, int val)
+extern inline void out_le32(volatile unsigned __iomem *addr, int val)
 {
-    __asm__ __volatile__("stwbrx %1,0,%2; eieio" : "=m" (*addr) :
-		 "r" (val), "r" (addr));
+	__asm__ __volatile__("sync; stwbrx %1,0,%2" : "=m" (*addr) :
+			     "r" (val), "r" (addr));
 }
 
-extern inline void out_be32(volatile unsigned *addr, int val)
+extern inline void out_be32(volatile unsigned __iomem *addr, int val)
 {
-    __asm__ __volatile__("stw%U0%X0 %1,%0; eieio" : "=m" (*addr) : "r" (val));
+	__asm__ __volatile__("sync; stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val));
 }
 
 #endif
diff --git a/include/configs/bamboo.h b/include/configs/bamboo.h
index db58a9f..763d1c7 100644
--- a/include/configs/bamboo.h
+++ b/include/configs/bamboo.h
@@ -50,7 +50,7 @@
  *----------------------------------------------------------------------*/
 #define CFG_MONITOR_LEN		(384 * 1024)	/* Reserve 384 kB for Monitor	*/
 #define CFG_MALLOC_LEN		(256 * 1024)	/* Reserve 256 kB for malloc()	*/
-#define CFG_MONITOR_BASE	(-CFG_MONITOR_LEN)
+#define CFG_MONITOR_BASE	TEXT_BASE
 #define CFG_SDRAM_BASE	        0x00000000	    /* _must_ be 0	*/
 #define CFG_FLASH_BASE	        0xfff00000	    /* start of FLASH	*/
 #define CFG_PCI_MEMBASE	        0xa0000000	    /* mapped pci memory*/
@@ -104,14 +104,11 @@
 /*-----------------------------------------------------------------------
  * Environment
  *----------------------------------------------------------------------*/
-/*
- * Define here the location of the environment variables (FLASH or EEPROM).
- * Note: DENX encourages to use redundant environment in FLASH.
- */
-#if 1
+#if !defined(CONFIG_NAND_U_BOOT) && !defined(CONFIG_NAND_SPL)
 #define CFG_ENV_IS_IN_FLASH     1	/* use FLASH for environment vars	*/
 #else
-#define CFG_ENV_IS_IN_EEPROM	1	/* use EEPROM for environment vars	*/
+#define CFG_ENV_IS_IN_NAND	1	/* use NAND for environment vars	*/
+#define CFG_ENV_IS_EMBEDDED	1	/* use embedded environment */
 #endif
 
 /*-----------------------------------------------------------------------
@@ -133,7 +130,7 @@
 
 #ifdef CFG_ENV_IS_IN_FLASH
 #define CFG_ENV_SECT_SIZE	0x10000 	/* size of one complete sector	*/
-#define CFG_ENV_ADDR		(CFG_MONITOR_BASE-CFG_ENV_SECT_SIZE)
+#define CFG_ENV_ADDR		((-CFG_MONITOR_LEN)-CFG_ENV_SECT_SIZE)
 #define	CFG_ENV_SIZE		0x2000	/* Total Size of Environment Sector	*/
 
 /* Address and size of Redundant Environment Sector	*/
@@ -141,22 +138,89 @@
 #define CFG_ENV_SIZE_REDUND	(CFG_ENV_SIZE)
 #endif /* CFG_ENV_IS_IN_FLASH */
 
+/*
+ * IPL (Initial Program Loader, integrated inside CPU)
+ * Will load first 4k from NAND (SPL) into cache and execute it from there.
+ *
+ * SPL (Secondary Program Loader)
+ * Will load special U-Boot version (NUB) from NAND and execute it. This SPL
+ * has to fit into 4kByte. It sets up the CPU and configures the SDRAM
+ * controller and the NAND controller so that the special U-Boot image can be
+ * loaded from NAND to SDRAM.
+ *
+ * NUB (NAND U-Boot)
+ * This NAND U-Boot (NUB) is a special U-Boot version which can be started
+ * from RAM. Therefore it mustn't (re-)configure the SDRAM controller.
+ *
+ * On 440EPx the SPL is copied to SDRAM before the NAND controller is
+ * set up. While still running from cache, I experienced problems accessing
+ * the NAND controller.	sr - 2006-08-25
+ */
+#define CFG_NAND_BOOT_SPL_SRC	0xfffff000	/* SPL location			*/
+#define CFG_NAND_BOOT_SPL_SIZE	(4 << 10)	/* SPL size			*/
+#define CFG_NAND_BOOT_SPL_DST	0x00800000	/* Copy SPL here		*/
+#define CFG_NAND_U_BOOT_DST	0x01000000	/* Load NUB to this addr	*/
+#define CFG_NAND_U_BOOT_START	CFG_NAND_U_BOOT_DST /* Start NUB from this addr	*/
+#define CFG_NAND_BOOT_SPL_DELTA	(CFG_NAND_BOOT_SPL_SRC - CFG_NAND_BOOT_SPL_DST)
+
+/*
+ * Define the partitioning of the NAND chip (only RAM U-Boot is needed here)
+ */
+#define CFG_NAND_U_BOOT_OFFS	(16 << 10)	/* Offset to RAM U-Boot image	*/
+#define CFG_NAND_U_BOOT_SIZE	(384 << 10)	/* Size of RAM U-Boot image	*/
+
+/*
+ * Now the NAND chip has to be defined (no autodetection used!)
+ */
+#define CFG_NAND_PAGE_SIZE	512		/* NAND chip page size		*/
+#define CFG_NAND_BLOCK_SIZE	(16 << 10)	/* NAND chip block size		*/
+#define CFG_NAND_PAGE_COUNT	32		/* NAND chip page count		*/
+#define CFG_NAND_BAD_BLOCK_POS	5		/* Location of bad block marker	*/
+#define CFG_NAND_4_ADDR_CYCLE	1		/* Fourth addr used (>32MB)	*/
+
+#define CFG_NAND_ECCSIZE	256
+#define CFG_NAND_ECCBYTES	3
+#define CFG_NAND_ECCSTEPS	(CFG_NAND_PAGE_SIZE / CFG_NAND_ECCSIZE)
+#define CFG_NAND_OOBSIZE	16
+#define CFG_NAND_ECCTOTAL	(CFG_NAND_ECCBYTES * CFG_NAND_ECCSTEPS)
+#define CFG_NAND_ECCPOS		{0, 1, 2, 3, 6, 7}
+
+#ifdef CFG_ENV_IS_IN_NAND
+/*
+ * For NAND booting the environment is embedded in the U-Boot image. Please take
+ * look at the file board/amcc/sequoia/u-boot-nand.lds for details.
+ */
+#define CFG_ENV_SIZE		CFG_NAND_BLOCK_SIZE
+#define CFG_ENV_OFFSET		(CFG_NAND_U_BOOT_OFFS + CFG_ENV_SIZE)
+#define CFG_ENV_OFFSET_REDUND	(CFG_ENV_OFFSET + CFG_ENV_SIZE)
+#endif
+
 /*-----------------------------------------------------------------------
  * NAND FLASH
  *----------------------------------------------------------------------*/
-#define CFG_MAX_NAND_DEVICE	1
-#define NAND_MAX_CHIPS		1
-#define CFG_NAND_CS		1
+#define CFG_MAX_NAND_DEVICE	2
+#define NAND_MAX_CHIPS		CFG_MAX_NAND_DEVICE
 #define CFG_NAND_BASE		(CFG_NAND_ADDR + CFG_NAND_CS)
+#define CFG_NAND_BASE_LIST	{ CFG_NAND_BASE, CFG_NAND_ADDR + 2 }
 #define CFG_NAND_SELECT_DEVICE  1	/* nand driver supports mutipl. chips	*/
 
+#if !defined(CONFIG_NAND_U_BOOT) && !defined(CONFIG_NAND_SPL)
+#define CFG_NAND_CS		1
+#else
+#define CFG_NAND_CS		0		/* NAND chip connected to CSx	*/
+/* Memory Bank 0 (NAND-FLASH) initialization					*/
+#define CFG_EBC_PB0AP		0x018003c0
+#define CFG_EBC_PB0CR		(CFG_NAND_ADDR | 0x1c000)
+#endif
+
 /*-----------------------------------------------------------------------
  * DDR SDRAM
  *----------------------------------------------------------------------------- */
 #define CONFIG_SPD_EEPROM               /* Use SPD EEPROM for setup             */
 #undef CONFIG_DDR_ECC			/* don't use ECC			*/
 #define CFG_SIMULATE_SPD_EEPROM	0xff	/* simulate spd eeprom on this address	*/
-#define SPD_EEPROM_ADDRESS      {CFG_SIMULATE_SPD_EEPROM, 0x50, 0x51}
+#define SPD_EEPROM_ADDRESS	{CFG_SIMULATE_SPD_EEPROM, 0x50, 0x51}
+#define CFG_MBYTES_SDRAM	(64)	/* 64MB fixed size for early-sdram-init */
 
 /*-----------------------------------------------------------------------
  * I2C
diff --git a/include/configs/sequoia.h b/include/configs/sequoia.h
index e1572ba..23243a4 100644
--- a/include/configs/sequoia.h
+++ b/include/configs/sequoia.h
@@ -168,12 +168,19 @@
 /*
  * Now the NAND chip has to be defined (no autodetection used!)
  */
-#define CFG_NAND_PAGE_SIZE	(512)		/* NAND chip page size		*/
+#define CFG_NAND_PAGE_SIZE	512		/* NAND chip page size		*/
 #define CFG_NAND_BLOCK_SIZE	(16 << 10)	/* NAND chip block size		*/
-#define CFG_NAND_PAGE_COUNT	(32)		/* NAND chip page count		*/
-#define CFG_NAND_BAD_BLOCK_POS	(5)		/* Location of bad block marker	*/
+#define CFG_NAND_PAGE_COUNT	32		/* NAND chip page count		*/
+#define CFG_NAND_BAD_BLOCK_POS	5		/* Location of bad block marker	*/
 #undef CFG_NAND_4_ADDR_CYCLE			/* No fourth addr used (<=32MB)	*/
 
+#define CFG_NAND_ECCSIZE	256
+#define CFG_NAND_ECCBYTES	3
+#define CFG_NAND_ECCSTEPS	(CFG_NAND_PAGE_SIZE / CFG_NAND_ECCSIZE)
+#define CFG_NAND_OOBSIZE	16
+#define CFG_NAND_ECCTOTAL	(CFG_NAND_ECCBYTES * CFG_NAND_ECCSTEPS)
+#define CFG_NAND_ECCPOS		{0, 1, 2, 3, 6, 7}
+
 #ifdef CFG_ENV_IS_IN_NAND
 /*
  * For NAND booting the environment is embedded in the U-Boot image. Please take
diff --git a/nand_spl/board/amcc/bamboo/Makefile b/nand_spl/board/amcc/bamboo/Makefile
new file mode 100644
index 0000000..0df86f9
--- /dev/null
+++ b/nand_spl/board/amcc/bamboo/Makefile
@@ -0,0 +1,100 @@
+#
+# (C) Copyright 2007
+# Stefan Roese, DENX Software Engineering, sr@denx.de.
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+
+include $(TOPDIR)/config.mk
+include $(TOPDIR)/nand_spl/board/$(BOARDDIR)/config.mk
+
+LDSCRIPT= $(TOPDIR)/nand_spl/board/$(BOARDDIR)/u-boot.lds
+LDFLAGS	= -Bstatic -T $(LDSCRIPT) -Ttext $(TEXT_BASE) $(PLATFORM_LDFLAGS)
+AFLAGS	+= -DCONFIG_NAND_SPL
+CFLAGS	+= -DCONFIG_NAND_SPL
+
+SOBJS	= start.o init.o resetvec.o
+COBJS	= nand_boot.o nand_ecc.o ndfc.o sdram.o
+
+SRCS	:= $(addprefix $(obj),$(SOBJS:.o=.S) $(COBJS:.o=.c))
+OBJS	:= $(addprefix $(obj),$(SOBJS) $(COBJS))
+__OBJS	:= $(SOBJS) $(COBJS)
+LNDIR	:= $(OBJTREE)/nand_spl/board/$(BOARDDIR)
+
+nandobj	:= $(OBJTREE)/nand_spl/
+
+ALL	= $(nandobj)u-boot-spl $(nandobj)u-boot-spl.bin $(nandobj)u-boot-spl-16k.bin
+
+all:	$(obj).depend $(ALL)
+
+$(nandobj)u-boot-spl-16k.bin: $(nandobj)u-boot-spl
+	$(OBJCOPY) ${OBJCFLAGS} --pad-to=$(PAD_TO) -O binary $< $@
+
+$(nandobj)u-boot-spl.bin:	$(nandobj)u-boot-spl
+	$(OBJCOPY) ${OBJCFLAGS} -O binary $< $@
+
+$(nandobj)u-boot-spl:	$(OBJS)
+	cd $(LNDIR) && $(LD) $(LDFLAGS) $$UNDEF_SYM $(__OBJS) \
+		-Map $(nandobj)u-boot-spl.map \
+		-o $(nandobj)u-boot-spl
+
+# create symbolic links for common files
+
+# from cpu directory
+$(obj)ndfc.c:
+	@rm -f $(obj)ndfc.c
+	ln -s $(SRCTREE)/cpu/ppc4xx/ndfc.c $(obj)ndfc.c
+
+$(obj)resetvec.S:
+	@rm -f $(obj)resetvec.S
+	ln -s $(SRCTREE)/cpu/ppc4xx/resetvec.S $(obj)resetvec.S
+
+$(obj)start.S:
+	@rm -f $(obj)start.S
+	ln -s $(SRCTREE)/cpu/ppc4xx/start.S $(obj)start.S
+
+# from board directory
+$(obj)init.S:
+	@rm -f $(obj)init.S
+	ln -s $(SRCTREE)/board/amcc/bamboo/init.S $(obj)init.S
+
+# from nand_spl directory
+$(obj)nand_boot.c:
+	@rm -f $(obj)nand_boot.c
+	ln -s $(SRCTREE)/nand_spl/nand_boot.c $(obj)nand_boot.c
+
+# from drivers/nand directory
+$(obj)nand_ecc.c:
+	@rm -f $(obj)nand_ecc.c
+	ln -s $(SRCTREE)/drivers/nand/nand_ecc.c $(obj)nand_ecc.c
+
+#########################################################################
+
+$(obj)%.o:	$(obj)%.S
+	$(CC) $(AFLAGS) -c -o $@ $<
+
+$(obj)%.o:	$(obj)%.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+# defines $(obj).depend target
+include $(SRCTREE)/rules.mk
+
+sinclude $(obj).depend
+
+#########################################################################
diff --git a/nand_spl/board/amcc/bamboo/config.mk b/nand_spl/board/amcc/bamboo/config.mk
new file mode 100644
index 0000000..f7ec751
--- /dev/null
+++ b/nand_spl/board/amcc/bamboo/config.mk
@@ -0,0 +1,49 @@
+#
+# (C) Copyright 2007
+# Stefan Roese, DENX Software Engineering, sr@denx.de.
+#
+# See file CREDITS for list of people who contributed to this
+# project.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307 USA
+#
+#
+# AMCC 440EP Reference Platform (Bamboo) board
+#
+
+#
+# TEXT_BASE for SPL:
+#
+# On 440EP(x) platforms the SPL is located at 0xfffff000...0xffffffff,
+# in the last 4kBytes of memory space in cache.
+# We will copy this SPL into instruction-cache in start.S. So we set
+# TEXT_BASE to starting address in i-cache here.
+#
+TEXT_BASE = 0x00800000
+
+# PAD_TO used to generate a 16kByte binary needed for the combined image
+# -> PAD_TO = TEXT_BASE + 0x4000
+PAD_TO	= 0x00804000
+
+PLATFORM_CPPFLAGS += -DCONFIG_440=1
+
+ifeq ($(debug),1)
+PLATFORM_CPPFLAGS += -DDEBUG
+endif
+
+ifeq ($(dbcr),1)
+PLATFORM_CPPFLAGS += -DCFG_INIT_DBCR=0x8cff0000
+endif
diff --git a/nand_spl/board/amcc/bamboo/sdram.c b/nand_spl/board/amcc/bamboo/sdram.c
new file mode 100644
index 0000000..4f09072
--- /dev/null
+++ b/nand_spl/board/amcc/bamboo/sdram.c
@@ -0,0 +1,92 @@
+/*
+ * (C) Copyright 2007
+ * Stefan Roese, DENX Software Engineering, sr@denx.de.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+#include <common.h>
+#include <ppc4xx.h>
+#include <asm/processor.h>
+#include <asm/io.h>
+
+static void wait_init_complete(void)
+{
+	u32 val;
+
+	do {
+		mfsdram(mem_mcsts, val);
+	} while (!(val & 0x80000000));
+}
+
+/*
+ * early_sdram_init()
+ *
+ * As the name already indicates, this function is called very early
+ * from start.S and configures the SDRAM with fixed values. This is needed,
+ * since the 440EP has no internal SRAM and the 4kB NAND_SPL loader has
+ * not enough free space to implement the complete I2C SPD DDR autodetection
+ * routines. Therefore the Bamboo only supports the onboard 64MBytes of SDRAM
+ * when booting from NAND flash.
+ */
+void early_sdram_init(void)
+{
+	/*
+	 * Soft-reset SDRAM controller.
+	 */
+	mtsdr(sdr_srst, SDR0_SRST_DMC);
+	mtsdr(sdr_srst, 0x00000000);
+
+	/*
+	 * Disable memory controller.
+	 */
+	mtsdram(mem_cfg0, 0x00000000);
+
+	/*
+	 * Setup some default
+	 */
+	mtsdram(mem_uabba, 0x00000000); /* ubba=0 (default)		*/
+	mtsdram(mem_slio, 0x00000000);	/* rdre=0 wrre=0 rarw=0		*/
+	mtsdram(mem_devopt, 0x00000000); /* dll=0 ds=0 (normal)		*/
+	mtsdram(mem_wddctr, 0x00000000); /* wrcp=0 dcd=0		*/
+	mtsdram(mem_clktr, 0x40000000); /* clkp=1 (90 deg wr) dcdt=0	*/
+
+	/*
+	 * Following for CAS Latency = 2.5 @ 133 MHz PLB
+	 */
+	mtsdram(mem_b0cr, 0x00082001);
+	mtsdram(mem_tr0, 0x41094012);
+	mtsdram(mem_tr1, 0x8080083d);	/* SS=T2 SL=STAGE 3 CD=1 CT=0x00*/
+	mtsdram(mem_rtr, 0x04100000);	/* Interval 7.8µs @ 133MHz PLB	*/
+	mtsdram(mem_cfg1, 0x00000000);	/* Self-refresh exit, disable PM*/
+
+	/*
+	 * Enable the controller, then wait for DCEN to complete
+	 */
+	mtsdram(mem_cfg0, 0x80000000); /* DCEN=1, PMUD=0*/
+	wait_init_complete();
+}
+
+long int initdram(int board_type)
+{
+	/*
+	 * Nothing to do here, just return size of fixed SDRAM setup
+	 */
+	return CFG_MBYTES_SDRAM << 20;
+}
diff --git a/nand_spl/board/amcc/bamboo/u-boot.lds b/nand_spl/board/amcc/bamboo/u-boot.lds
new file mode 100644
index 0000000..28228f8
--- /dev/null
+++ b/nand_spl/board/amcc/bamboo/u-boot.lds
@@ -0,0 +1,65 @@
+/*
+ * (C) Copyright 2007
+ * Stefan Roese, DENX Software Engineering, sr@denx.de.
+ *
+ * See file CREDITS for list of people who contributed to this
+ * project.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ */
+
+OUTPUT_ARCH(powerpc:common)
+SECTIONS
+{
+  .resetvec 0x00800FFC :
+  {
+    *(.resetvec)
+  } = 0xffff
+
+  .text      :
+  {
+    start.o	(.text)
+    init.o	(.text)
+    nand_boot.o	(.text)
+    sdram.o	(.text)
+    ndfc.o	(.text)
+
+    *(.text)
+    *(.fixup)
+  }
+  _etext = .;
+
+  .data    :
+  {
+    *(.rodata*)
+    *(.data*)
+    *(.sdata*)
+    __got2_start = .;
+    *(.got2)
+    __got2_end = .;
+  }
+
+  _edata  =  .;
+
+  __bss_start = .;
+  .bss       :
+  {
+   *(.sbss)
+   *(.bss)
+  }
+
+  _end = . ;
+}
diff --git a/nand_spl/board/amcc/sequoia/Makefile b/nand_spl/board/amcc/sequoia/Makefile
index b42da8c..ec1be5a 100644
--- a/nand_spl/board/amcc/sequoia/Makefile
+++ b/nand_spl/board/amcc/sequoia/Makefile
@@ -1,5 +1,5 @@
 #
-# (C) Copyright 2006
+# (C) Copyright 2006-2007
 # Stefan Roese, DENX Software Engineering, sr@denx.de.
 #
 # See file CREDITS for list of people who contributed to this
@@ -30,7 +30,7 @@
 CFLAGS	+= -DCONFIG_NAND_SPL
 
 SOBJS	= start.o init.o resetvec.o
-COBJS	= nand_boot.o ndfc.o sdram.o
+COBJS	= nand_boot.o nand_ecc.o ndfc.o sdram.o
 
 SRCS	:= $(addprefix $(obj),$(SOBJS:.o=.S) $(COBJS:.o=.c))
 OBJS	:= $(addprefix $(obj),$(SOBJS) $(COBJS))
@@ -85,6 +85,11 @@
 	@rm -f $(obj)nand_boot.c
 	ln -s $(SRCTREE)/nand_spl/nand_boot.c $(obj)nand_boot.c
 
+# from drivers/nand directory
+$(obj)nand_ecc.c:
+	@rm -f $(obj)nand_ecc.c
+	ln -s $(SRCTREE)/drivers/nand/nand_ecc.c $(obj)nand_ecc.c
+
 #########################################################################
 
 $(obj)%.o:	$(obj)%.S
diff --git a/nand_spl/nand_boot.c b/nand_spl/nand_boot.c
index a136fb7..840a596 100644
--- a/nand_spl/nand_boot.c
+++ b/nand_spl/nand_boot.c
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright 2006
+ * (C) Copyright 2006-2007
  * Stefan Roese, DENX Software Engineering, sr@denx.de.
  *
  * This program is free software; you can redistribute it and/or
@@ -24,27 +24,28 @@
 #define CFG_NAND_READ_DELAY \
 	{ volatile int dummy; int i; for (i=0; i<10000; i++) dummy = i; }
 
+static int nand_ecc_pos[] = CFG_NAND_ECCPOS;
+
 extern void board_nand_init(struct nand_chip *nand);
-extern void ndfc_hwcontrol(struct mtd_info *mtdinfo, int cmd);
-extern void ndfc_write_byte(struct mtd_info *mtdinfo, u_char byte);
-extern u_char ndfc_read_byte(struct mtd_info *mtdinfo);
-extern int ndfc_dev_ready(struct mtd_info *mtdinfo);
-extern int jump_to_ram(ulong delta);
-extern int jump_to_uboot(ulong addr);
 
-static int nand_is_bad_block(struct mtd_info *mtd, int block)
+static int nand_command(struct mtd_info *mtd, int block, int page, int offs, u8 cmd)
 {
 	struct nand_chip *this = mtd->priv;
-	int page_addr = block * CFG_NAND_PAGE_COUNT;
+	int page_addr = page + block * CFG_NAND_PAGE_COUNT;
+
+	if (this->dev_ready)
+		this->dev_ready(mtd);
+	else
+		CFG_NAND_READ_DELAY;
 
 	/* Begin command latch cycle */
 	this->hwcontrol(mtd, NAND_CTL_SETCLE);
-	this->write_byte(mtd, NAND_CMD_READOOB);
+	this->write_byte(mtd, cmd);
 	/* Set ALE and clear CLE to start address cycle */
 	this->hwcontrol(mtd, NAND_CTL_CLRCLE);
 	this->hwcontrol(mtd, NAND_CTL_SETALE);
 	/* Column address */
-	this->write_byte(mtd, CFG_NAND_BAD_BLOCK_POS);			/* A[7:0] */
+	this->write_byte(mtd, offs);					/* A[7:0] */
 	this->write_byte(mtd, (uchar)(page_addr & 0xff));		/* A[16:9] */
 	this->write_byte(mtd, (uchar)((page_addr >> 8) & 0xff));	/* A[24:17] */
 #ifdef CFG_NAND_4_ADDR_CYCLE
@@ -62,6 +63,15 @@
 	else
 		CFG_NAND_READ_DELAY;
 
+	return 0;
+}
+
+static int nand_is_bad_block(struct mtd_info *mtd, int block)
+{
+	struct nand_chip *this = mtd->priv;
+
+	nand_command(mtd, block, 0, CFG_NAND_BAD_BLOCK_POS, NAND_CMD_READOOB);
+
 	/*
 	 * Read on byte
 	 */
@@ -74,39 +84,46 @@
 static int nand_read_page(struct mtd_info *mtd, int block, int page, uchar *dst)
 {
 	struct nand_chip *this = mtd->priv;
-	int page_addr = page + block * CFG_NAND_PAGE_COUNT;
+	u_char *ecc_calc;
+	u_char *ecc_code;
+	u_char *oob_data;
 	int i;
+	int eccsize = CFG_NAND_ECCSIZE;
+	int eccbytes = CFG_NAND_ECCBYTES;
+	int eccsteps = CFG_NAND_ECCSTEPS;
+	uint8_t *p = dst;
+	int stat;
 
-	/* Begin command latch cycle */
-	this->hwcontrol(mtd, NAND_CTL_SETCLE);
-	this->write_byte(mtd, NAND_CMD_READ0);
-	/* Set ALE and clear CLE to start address cycle */
-	this->hwcontrol(mtd, NAND_CTL_CLRCLE);
-	this->hwcontrol(mtd, NAND_CTL_SETALE);
-	/* Column address */
-	this->write_byte(mtd, 0);					/* A[7:0] */
-	this->write_byte(mtd, (uchar)(page_addr & 0xff));		/* A[16:9] */
-	this->write_byte(mtd, (uchar)((page_addr >> 8) & 0xff));	/* A[24:17] */
-#ifdef CFG_NAND_4_ADDR_CYCLE
-	/* One more address cycle for devices > 32MiB */
-	this->write_byte(mtd, (uchar)((page_addr >> 16) & 0x0f));	/* A[xx:25] */
-#endif
-	/* Latch in address */
-	this->hwcontrol(mtd, NAND_CTL_CLRALE);
+	nand_command(mtd, block, page, 0, NAND_CMD_READ0);
 
-	/*
-	 * Wait a while for the data to be ready
+	/* No malloc available for now, just use some temporary locations
+	 * in SDRAM
 	 */
-	if (this->dev_ready)
-		this->dev_ready(mtd);
-	else
-		CFG_NAND_READ_DELAY;
+	ecc_calc = (u_char *)(CFG_SDRAM_BASE + 0x10000);
+	ecc_code = ecc_calc + 0x100;
+	oob_data = ecc_calc + 0x200;
 
-	/*
-	 * Read page into buffer
-	 */
-	for (i=0; i<CFG_NAND_PAGE_SIZE; i++)
-		*dst++ = this->read_byte(mtd);
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+		this->enable_hwecc(mtd, NAND_ECC_READ);
+		this->read_buf(mtd, p, eccsize);
+		this->calculate_ecc(mtd, p, &ecc_calc[i]);
+	}
+	this->read_buf(mtd, oob_data, CFG_NAND_OOBSIZE);
+
+	/* Pick the ECC bytes out of the oob data */
+	for (i = 0; i < CFG_NAND_ECCTOTAL; i++)
+		ecc_code[i] = oob_data[nand_ecc_pos[i]];
+
+	eccsteps = CFG_NAND_ECCSTEPS;
+	p = dst;
+
+	for (i = 0 ; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+		/* No chance to do something with the possible error message
+		 * from correct_data(). We just hope that all possible errors
+		 * are corrected by this routine.
+		 */
+		stat = this->correct_data(mtd, p, &ecc_code[i], &ecc_calc[i]);
+	}
 
 	return 0;
 }