85xx: Update multicore boot mechanism to ePAPR v0.81 spec

The following changes are needed to be inline with ePAPR v0.81:

* r4, r5 and now always set to 0 on boot release
* r7 is used to pass the size of the initial map area (IMA)
* EPAPR_MAGIC value changed for book-e processors
* changes in the spin table layout
* spin table supports a 64-bit physical release address

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
diff --git a/common/cmd_mp.c b/common/cmd_mp.c
index d96e6a3..26a57c5 100644
--- a/common/cmd_mp.c
+++ b/common/cmd_mp.c
@@ -26,7 +26,7 @@
 int
 cpu_cmd(cmd_tbl_t *cmdtp, int flag, int argc, char *argv[])
 {
-	unsigned long cpuid, val = 0;
+	unsigned long cpuid;
 
 	if (argc < 3) {
 		printf ("Usage:\n%s\n", cmdtp->usage);
@@ -59,9 +59,7 @@
 		return 1;
 	}
 
-	val = simple_strtoul(argv[3], NULL, 16);
-
-	if (cpu_release(cpuid, val, argc - 4, argv + 4)) {
+	if (cpu_release(cpuid, argc - 3, argv + 3)) {
 		printf ("Usage:\n%s\n", cmdtp->usage);
 		return 1;
 	}
@@ -71,17 +69,16 @@
 
 #ifdef CONFIG_PPC
 #define CPU_ARCH_HELP \
-	"                         [args] : <pir> <r3> <r4> <r6> <r7>\n" \
+	"                         [args] : <pir> <r3> <r6>\n" \
 	"                                   pir - processor id (if writeable)\n" \
 	"                                    r3 - value for gpr 3\n" \
-	"                                    r4 - value for gpr 4\n" \
 	"                                    r6 - value for gpr 6\n" \
-	"                                    r7 - value for gpr 7\n" \
 	"\n" \
 	"     Use '-' for any arg if you want the default value.\n" \
-	"     Default for r3, r4, r7 is 0, r6 is 0x65504150\n" \
+	"     Default for r3 is <num> and r6 is 0\n" \
 	"\n" \
-	"     When cpu <num> is released r5 = 0 per the ePAPR spec.\n"
+	"     When cpu <num> is released r4 and r5 = 0.\n" \
+	"     r7 will contain the size of the initial mapped area\n"
 #endif
 
 U_BOOT_CMD(
diff --git a/cpu/mpc85xx/fdt.c b/cpu/mpc85xx/fdt.c
index 43df1c7..bde6d1e 100644
--- a/cpu/mpc85xx/fdt.c
+++ b/cpu/mpc85xx/fdt.c
@@ -52,7 +52,7 @@
 			if (*reg == id) {
 				fdt_setprop_string(blob, off, "status", "okay");
 			} else {
-				u32 val = *reg * 24 + spin_tbl_addr;
+				u32 val = *reg * SIZE_BOOT_ENTRY + spin_tbl_addr;
 				val = cpu_to_fdt32(val);
 				fdt_setprop_string(blob, off, "status",
 								"disabled");
diff --git a/cpu/mpc85xx/mp.c b/cpu/mpc85xx/mp.c
index aa91cea..d3727b0 100644
--- a/cpu/mpc85xx/mp.c
+++ b/cpu/mpc85xx/mp.c
@@ -28,14 +28,6 @@
 
 DECLARE_GLOBAL_DATA_PTR;
 
-#define BOOT_ENTRY_ADDR	0
-#define BOOT_ENTRY_PIR	1
-#define BOOT_ENTRY_R3	2
-#define BOOT_ENTRY_R4	3
-#define BOOT_ENTRY_R6	4
-#define BOOT_ENTRY_R7	5
-#define NUM_BOOT_ENTRY	6
-
 u32 get_my_id()
 {
 	return mfspr(SPRN_PIR);
@@ -63,40 +55,54 @@
 		printf("Running on cpu %d\n", id);
 		printf("\n");
 		printf("table @ 0x%08x:\n", table);
-		printf("   addr - 0x%08x\n", table[BOOT_ENTRY_ADDR]);
+		printf("   addr - 0x%08x\n", table[BOOT_ENTRY_ADDR_LOWER]);
 		printf("   pir  - 0x%08x\n", table[BOOT_ENTRY_PIR]);
-		printf("   r3   - 0x%08x\n", table[BOOT_ENTRY_R3]);
-		printf("   r4   - 0x%08x\n", table[BOOT_ENTRY_R4]);
-		printf("   r6   - 0x%08x\n", table[BOOT_ENTRY_R6]);
-		printf("   r7   - 0x%08x\n", table[BOOT_ENTRY_R7]);
+		printf("   r3   - 0x%08x\n", table[BOOT_ENTRY_R3_LOWER]);
+		printf("   r6   - 0x%08x\n", table[BOOT_ENTRY_R6_LOWER]);
 	}
 
 	return 0;
 }
 
-int cpu_release(int nr, unsigned long boot_addr, int argc, char *argv[])
+static u8 boot_entry_map[4] = {
+	0,
+	BOOT_ENTRY_PIR,
+	BOOT_ENTRY_R3_LOWER,
+	BOOT_ENTRY_R6_LOWER,
+};
+
+int cpu_release(int nr, int argc, char *argv[])
 {
 	u32 i, val, *table = (u32 *)get_spin_addr() + nr * NUM_BOOT_ENTRY;
+	u64 boot_addr;
 
 	if (nr == get_my_id()) {
 		printf("Invalid to release the boot core.\n\n");
 		return 1;
 	}
 
-	if (argc != 5) {
+	if (argc != 4) {
 		printf("Invalid number of arguments to release.\n\n");
 		return 1;
 	}
 
-	/* handle pir, r3, r4, r6, r7 */
-	for (i = 0; i < 5; i++) {
+#ifdef CFG_64BIT_STRTOUL
+	boot_addr = simple_strtoull(argv[0], NULL, 16);
+#else
+	boot_addr = simple_strtoul(argv[0], NULL, 16);
+#endif
+
+	/* handle pir, r3, r6 */
+	for (i = 1; i < 4; i++) {
 		if (argv[i][0] != '-') {
+			u8 entry = boot_entry_map[i];
 			val = simple_strtoul(argv[i], NULL, 16);
-			table[i+BOOT_ENTRY_PIR] = val;
+			table[entry] = val;
 		}
 	}
 
-	table[BOOT_ENTRY_ADDR] = boot_addr;
+	table[BOOT_ENTRY_ADDR_UPPER] = (u32)(boot_addr >> 32);
+	table[BOOT_ENTRY_ADDR_LOWER] = (u32)(boot_addr & 0xffffffff);
 
 	return 0;
 }
diff --git a/cpu/mpc85xx/mp.h b/cpu/mpc85xx/mp.h
index d9fbb82..b762ee2 100644
--- a/cpu/mpc85xx/mp.h
+++ b/cpu/mpc85xx/mp.h
@@ -5,4 +5,15 @@
 void setup_mp(void);
 u32 get_my_id(void);
 
+#define BOOT_ENTRY_ADDR_UPPER	0
+#define BOOT_ENTRY_ADDR_LOWER	1
+#define BOOT_ENTRY_R3_UPPER	2
+#define BOOT_ENTRY_R3_LOWER	3
+#define BOOT_ENTRY_RESV		4
+#define BOOT_ENTRY_PIR		5
+#define BOOT_ENTRY_R6_UPPER	6
+#define BOOT_ENTRY_R6_LOWER	7
+#define NUM_BOOT_ENTRY		8
+#define SIZE_BOOT_ENTRY		(NUM_BOOT_ENTRY * sizeof(u32))
+
 #endif
diff --git a/cpu/mpc85xx/release.S b/cpu/mpc85xx/release.S
index fe1775c..3b7366f 100644
--- a/cpu/mpc85xx/release.S
+++ b/cpu/mpc85xx/release.S
@@ -57,61 +57,91 @@
 	lis	r3,toreset(__spin_table)@h
 	ori	r3,r3,toreset(__spin_table)@l
 
-	/* r9 has the base address for the entry */
+	/* r10 has the base address for the entry */
 	mfspr	r0,SPRN_PIR
 	mr	r4,r0
-	slwi	r8,r4,4
-	slwi	r9,r4,3
-	add	r8,r8,r9
-	add	r9,r3,r8
+	slwi	r8,r4,5
+	add	r10,r3,r8
 
-#define EPAPR_MAGIC	(0x65504150)
-#define ENTRY_ADDR	0
-#define ENTRY_PIR	4
-#define ENTRY_R3	8
-#define ENTRY_R4	12
-#define ENTRY_R6	16
-#define ENTRY_R7	20
+#define EPAPR_MAGIC		(0x45504150)
+#define ENTRY_ADDR_UPPER	0
+#define ENTRY_ADDR_LOWER	4
+#define ENTRY_R3_UPPER		8
+#define ENTRY_R3_LOWER		12
+#define ENTRY_RESV		16
+#define ENTRY_PIR		20
+#define ENTRY_R6_UPPER		24
+#define ENTRY_R6_LOWER		28
+#define ENTRY_SIZE		32
 
 	/* setup the entry */
-	li	r4,0
+	li	r3,0
 	li	r8,1
-	lis	r6,EPAPR_MAGIC@h
-	ori	r6,r6,EPAPR_MAGIC@l
-	stw	r0,ENTRY_PIR(r9)
-	stw	r8,ENTRY_ADDR(r9)
-	stw	r4,ENTRY_R3(r9)
-	stw	r4,ENTRY_R4(r9)
-	stw	r6,ENTRY_R6(r9)
-	stw	r4,ENTRY_R7(r9)
+	stw	r0,ENTRY_PIR(r10)
+	stw	r3,ENTRY_ADDR_UPPER(r10)
+	stw	r8,ENTRY_ADDR_LOWER(r10)
+	stw	r3,ENTRY_R3_UPPER(r10)
+	stw	r4,ENTRY_R3_LOWER(r10)
+	stw	r3,ENTRY_R6_UPPER(r10)
+	stw	r3,ENTRY_R6_LOWER(r10)
+
+	/* setup mapping for AS = 1, and jump there */
+	lis	r11,(MAS0_TLBSEL(1)|MAS0_ESEL(1))@h
+	mtspr	SPRN_MAS0,r11
+	lis	r11,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r11,r11,(MAS1_TS|MAS1_TSIZE(BOOKE_PAGESZ_4K))@l
+	mtspr	SPRN_MAS1,r11
+	lis	r11,(0xfffff000|MAS2_I)@h
+	ori	r11,r11,(0xfffff000|MAS2_I)@l
+	mtspr	SPRN_MAS2,r11
+	lis	r11,(0xfffff000|MAS3_SX|MAS3_SW|MAS3_SR)@h
+	ori	r11,r11,(0xfffff000|MAS3_SX|MAS3_SW|MAS3_SR)@l
+	mtspr	SPRN_MAS3,r11
+	tlbwe
+
+	bl	1f
+1:	mflr	r11
+	addi	r11,r11,28
+	mfmsr	r13
+	ori	r12,r13,MSR_IS|MSR_DS@l
+
+	mtspr	SPRN_SRR0,r11
+	mtspr	SPRN_SRR1,r12
+	rfi
 
 	/* spin waiting for addr */
-1:	lwz	r4,ENTRY_ADDR(r9)
+2:
+	lwz	r4,ENTRY_ADDR_LOWER(r10)
 	andi.	r11,r4,1
-	bne	1b
+	bne	2b
+
+	/* get the upper bits of the addr */
+	lwz	r11,ENTRY_ADDR_UPPER(r10)
 
 	/* setup branch addr */
-	mtctr	r4
+	mtspr	SPRN_SRR0,r4
 
 	/* mark the entry as released */
 	li	r8,3
-	stw	r8,ENTRY_ADDR(r9)
+	stw	r8,ENTRY_ADDR_LOWER(r10)
 
 	/* mask by ~64M to setup our tlb we will jump to */
-	rlwinm	r8,r4,0,0,5
+	rlwinm	r12,r4,0,0,5
 
-	/* setup r3, r5, r6, r7 */
-	lwz	r3,ENTRY_R3(r9)
-	lwz	r4,ENTRY_R4(r9)
+	/* setup r3, r4, r5, r6, r7, r8, r9 */
+	lwz	r3,ENTRY_R3_LOWER(r10)
+	li	r4,0
 	li	r5,0
-	lwz	r6,ENTRY_R6(r9)
-	lwz	r7,ENTRY_R7(r9)
+	lwz	r6,ENTRY_R6_LOWER(r10)
+	lis	r7,(64*1024*1024)@h
+	li	r8,0
+	li	r9,0
 
 	/* load up the pir */
-	lwz	r0,ENTRY_PIR(r9)
+	lwz	r0,ENTRY_PIR(r10)
 	mtspr	SPRN_PIR,r0
 	mfspr	r0,SPRN_PIR
-	stw	r0,ENTRY_PIR(r9)
+	stw	r0,ENTRY_PIR(r10)
 
 /*
  * Coming here, we know the cpu has one TLB mapping in TLB1[0]
@@ -119,26 +149,30 @@
  * second mapping that maps addr 1:1 for 64M, and then we jump to
  * addr
  */
-	lis	r9,(MAS0_TLBSEL(1)|MAS0_ESEL(1))@h
-	mtspr	SPRN_MAS0,r9
-	lis	r9,(MAS1_VALID|MAS1_IPROT)@h
-	ori	r9,r9,(MAS1_TSIZE(BOOKE_PAGESZ_64M))@l
-	mtspr	SPRN_MAS1,r9
+	lis	r10,(MAS0_TLBSEL(1)|MAS0_ESEL(0))@h
+	mtspr	SPRN_MAS0,r10
+	lis	r10,(MAS1_VALID|MAS1_IPROT)@h
+	ori	r10,r10,(MAS1_TSIZE(BOOKE_PAGESZ_64M))@l
+	mtspr	SPRN_MAS1,r10
 	/* WIMGE = 0b00000 for now */
-	mtspr	SPRN_MAS2,r8
-	ori	r8,r8,(MAS3_SX|MAS3_SW|MAS3_SR)
-	mtspr	SPRN_MAS3,r8
+	mtspr	SPRN_MAS2,r12
+	ori	r12,r12,(MAS3_SX|MAS3_SW|MAS3_SR)
+	mtspr	SPRN_MAS3,r12
+#ifdef CONFIG_ENABLE_36BIT_PHYS
+	mtspr	SPRN_MAS7,r11
+#endif
 	tlbwe
 
 /* Now we have another mapping for this page, so we jump to that
  * mapping
  */
-	bctr
+	mtspr	SPRN_SRR1,r13
+	rfi
 
 	.align 3
 	.globl __spin_table
 __spin_table:
-	.space CONFIG_NR_CPUS*24
+	.space CONFIG_NR_CPUS*ENTRY_SIZE
 
 	/* Fill in the empty space.  The actual reset vector is
 	 * the last word of the page */
diff --git a/include/common.h b/include/common.h
index f496073..39bcd30 100644
--- a/include/common.h
+++ b/include/common.h
@@ -673,7 +673,7 @@
 #ifdef CONFIG_MP
 int cpu_status(int nr);
 int cpu_reset(int nr);
-int cpu_release(int nr, unsigned long boot_addr, int argc, char *argv[]);
+int cpu_release(int nr, int argc, char *argv[]);
 #endif
 
 #endif	/* __COMMON_H_ */