feat(ccidx): update the do_dcsw_op function to support FEAT_CCIDX

FEAT_CCIDX modifies the register fields in CCSIDR/CCSIDR2 (aarch32)
and CCSIDR_EL1 (aarch64). This patch adds a check to the do_dcsw_op
function to use the right register format rather than assuming
that FEAT_CCIDX is not implemented.

Signed-off-by: John Powell <john.powell@arm.com>
Change-Id: I12cd00cd7b5889525d4d2750281a751dd74ef5dc
diff --git a/include/arch/aarch32/arch.h b/include/arch/aarch32/arch.h
index a1bd942..bdff25b 100644
--- a/include/arch/aarch32/arch.h
+++ b/include/arch/aarch32/arch.h
@@ -122,6 +122,10 @@
 #define ID_MMFR4_CNP_LENGTH	U(4)
 #define ID_MMFR4_CNP_MASK	U(0xf)
 
+#define ID_MMFR4_CCIDX_SHIFT	U(24)
+#define ID_MMFR4_CCIDX_LENGTH	U(4)
+#define ID_MMFR4_CCIDX_MASK	U(0xf)
+
 /* ID_PFR0 definitions */
 #define ID_PFR0_AMU_SHIFT	U(20)
 #define ID_PFR0_AMU_LENGTH	U(4)
@@ -174,7 +178,7 @@
 #define SCTLR_AFE_BIT		(U(1) << 29)
 #define SCTLR_TE_BIT		(U(1) << 30)
 #define SCTLR_DSSBS_BIT		(U(1) << 31)
-#define SCTLR_RESET_VAL         (SCTLR_RES1 | SCTLR_NTWE_BIT |		\
+#define SCTLR_RESET_VAL		(SCTLR_RES1 | SCTLR_NTWE_BIT |		\
 				SCTLR_NTWI_BIT | SCTLR_CP15BEN_BIT)
 
 /* SDCR definitions */
@@ -295,7 +299,7 @@
 #define CPACR_CP10_SHIFT	U(20)
 #define CPACR_ENABLE_FP_ACCESS	((U(0x3) << CPACR_CP11_SHIFT) |\
 				 (U(0x3) << CPACR_CP10_SHIFT))
-#define CPACR_RESET_VAL         U(0x0)
+#define CPACR_RESET_VAL		U(0x0)
 
 /* FPEXC definitions */
 #define FPEXC_RES1		((U(1) << 10) | (U(1) << 9) | (U(1) << 8))
@@ -495,13 +499,13 @@
 #define CNTP_CTL		U(0x2c)
 
 /* Physical timer control register bit fields shifts and masks */
-#define CNTP_CTL_ENABLE_SHIFT   0
-#define CNTP_CTL_IMASK_SHIFT    1
-#define CNTP_CTL_ISTATUS_SHIFT  2
+#define CNTP_CTL_ENABLE_SHIFT	0
+#define CNTP_CTL_IMASK_SHIFT	1
+#define CNTP_CTL_ISTATUS_SHIFT	2
 
-#define CNTP_CTL_ENABLE_MASK    U(1)
-#define CNTP_CTL_IMASK_MASK     U(1)
-#define CNTP_CTL_ISTATUS_MASK   U(1)
+#define CNTP_CTL_ENABLE_MASK	U(1)
+#define CNTP_CTL_IMASK_MASK	U(1)
+#define CNTP_CTL_ISTATUS_MASK	U(1)
 
 /* MAIR macros */
 #define MAIR0_ATTR_SET(attr, index)	((attr) << ((index) << U(3)))
@@ -559,6 +563,7 @@
 #define CLIDR		p15, 1, c0, c0, 1
 #define CSSELR		p15, 2, c0, c0, 0
 #define CCSIDR		p15, 1, c0, c0, 0
+#define CCSIDR2		p15, 1, c0, c0, 2
 #define HTCR		p15, 4, c2, c0, 2
 #define HMAIR0		p15, 4, c10, c2, 0
 #define ATS1CPR		p15, 0, c7, c8, 0
diff --git a/include/arch/aarch64/arch.h b/include/arch/aarch64/arch.h
index 0fb4e74..29da33c 100644
--- a/include/arch/aarch64/arch.h
+++ b/include/arch/aarch64/arch.h
@@ -297,6 +297,10 @@
 #define ID_AA64MMFR2_EL1_ST_SHIFT	U(28)
 #define ID_AA64MMFR2_EL1_ST_MASK	ULL(0xf)
 
+#define ID_AA64MMFR2_EL1_CCIDX_SHIFT	U(20)
+#define ID_AA64MMFR2_EL1_CCIDX_MASK	ULL(0xf)
+#define ID_AA64MMFR2_EL1_CCIDX_LENGTH	U(4)
+
 #define ID_AA64MMFR2_EL1_CNP_SHIFT	U(0)
 #define ID_AA64MMFR2_EL1_CNP_MASK	ULL(0xf)
 
diff --git a/lib/aarch32/cache_helpers.S b/lib/aarch32/cache_helpers.S
index 7cbefe6..13d1872 100644
--- a/lib/aarch32/cache_helpers.S
+++ b/lib/aarch32/cache_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2016-2021, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -91,6 +91,8 @@
 
 func do_dcsw_op
 	push	{r4-r12, lr}
+	ldcopr	r8, ID_MMFR4		// stash FEAT_CCIDX identifier in r8
+	ubfx	r8, r8, #ID_MMFR4_CCIDX_SHIFT, #ID_MMFR4_CCIDX_LENGTH
 	adr	r11, dcsw_loop_table	// compute cache op based on the operation type
 	add	r6, r11, r0, lsl #3	// cache op is 2x32-bit instructions
 loop1:
@@ -105,13 +107,25 @@
 	ldcopr	r12, CCSIDR		// read the new ccsidr
 	and	r10, r12, #7		// extract the length of the cache lines
 	add	r10, r10, #4		// add 4 (r10 = line length offset)
-	ubfx	r4, r12, #3, #10	// r4 = maximum way number (right aligned)
+
+	cmp	r8, #0			// check for FEAT_CCIDX for Associativity
+	beq	1f
+	ubfx	r4, r12, #3, #21 	// r4 = associativity CCSIDR[23:3]
+	b	2f
+1:
+	ubfx	r4, r12, #3, #10 	// r4 = associativity CCSIDR[12:3]
+2:
 	clz	r5, r4			// r5 = the bit position of the way size increment
 	mov	r9, r4			// r9 working copy of the aligned max way number
 
 loop2:
-	ubfx	r7, r12, #13, #15	// r7 = max set number (right aligned)
-
+	cmp	r8, #0			// check for FEAT_CCIDX for NumSets
+	beq	3f
+	ldcopr	r12, CCSIDR2		// FEAT_CCIDX numsets is in CCSIDR2
+	ubfx	r7, r12, #0, #24	// r7 = numsets CCSIDR2[23:0]
+	b	loop3
+3:
+	ubfx	r7, r12, #13, #15	// r7 = numsets CCSIDR[27:13]
 loop3:
 	orr	r0, r1, r9, LSL r5	// factor in the way number and cache level into r0
 	orr	r0, r0, r7, LSL r10	// factor in the set number
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index de9c8e4..d1f3847 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2021, Arm Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -89,6 +89,8 @@
 
 func do_dcsw_op
 	cbz	x3, exit
+	mrs	x12, ID_AA64MMFR2_EL1	// stash FEAT_CCIDX identifier in x12
+	ubfx	x12, x12, #ID_AA64MMFR2_EL1_CCIDX_SHIFT, #ID_AA64MMFR2_EL1_CCIDX_LENGTH
 	adr	x14, dcsw_loop_table	// compute inner loop address
 	add	x14, x14, x0, lsl #5	// inner loop is 8x32-bit instructions
 #if ENABLE_BTI
@@ -108,12 +110,25 @@
 	mrs	x1, ccsidr_el1		// read the new ccsidr
 	and	x2, x1, #7		// extract the length of the cache lines
 	add	x2, x2, #4		// add 4 (line length offset)
-	ubfx	x4, x1, #3, #10		// maximum way number
+
+	cbz	x12, 1f			// check for FEAT_CCIDX for Associativity
+	ubfx	x4, x1, #3, #21 	// x4 = associativity CCSIDR_EL1[23:3]
+	b 	2f
+1:
+	ubfx	x4, x1, #3, #10 	// x4 = associativity CCSIDR_EL1[12:3]
+2:
 	clz	w5, w4			// bit position of way size increment
 	lsl	w9, w4, w5		// w9 = aligned max way number
 	lsl	w16, w8, w5		// w16 = way number loop decrement
 	orr	w9, w10, w9		// w9 = combine way and cache number
-	ubfx	w6, w1, #13, #15	// w6 = max set number
+
+	cbz	x12, 3f			// check for FEAT_CCIDX for NumSets
+	ubfx	x6, x1, #32, #24	// x6 (w6) = numsets CCSIDR_EL1[55:32]
+					// ISA will not allow x->w ubfx
+	b	4f
+3:
+	ubfx	w6, w1, #13, #15	// w6 = numsets CCSIDR_EL1[27:13]
+4:
 	lsl	w17, w8, w2		// w17 = set number loop decrement
 	dsb	sy			// barrier before we start this level
 	br	x14			// jump to DC operation specific loop