Merge "fix(gpt): use DC CIGDPAPA when MTE2 is implemented" into integration
diff --git a/include/arch/aarch64/arch_helpers.h b/include/arch/aarch64/arch_helpers.h
index 2d97018..c205f2c 100644
--- a/include/arch/aarch64/arch_helpers.h
+++ b/include/arch/aarch64/arch_helpers.h
@@ -241,6 +241,7 @@
 
 void flush_dcache_range(uintptr_t addr, size_t size);
 void flush_dcache_to_popa_range(uintptr_t addr, size_t size);
+void flush_dcache_to_popa_range_mte2(uintptr_t addr, size_t size);
 void clean_dcache_range(uintptr_t addr, size_t size);
 void inv_dcache_range(uintptr_t addr, size_t size);
 bool is_dcache_enabled(void);
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 314ed6e..ff9a4e6 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -9,6 +9,7 @@
 
 	.globl	flush_dcache_range
 	.globl	flush_dcache_to_popa_range
+	.globl	flush_dcache_to_popa_range_mte2
 	.globl	clean_dcache_range
 	.globl	inv_dcache_range
 	.globl	dcsw_op_louis
@@ -17,6 +18,20 @@
 	.globl	dcsw_op_level2
 	.globl	dcsw_op_level3
 
+/* Opcodes for data cache maintenance by PA instructions. */
+
+/*
+ * sys  #6, c7, c14, #1, x0
+ * DC CIPAPA, X0
+ */
+#define dc_cipapa_x0	0xd50e7e20
+
+/*
+ * sys #6, c7, c14, #3, x0
+ * DC CIDGPAPA, X0
+  */
+#define dc_cigdpapa_x0	0xd50e7ea0
+
 /*
  * This macro can be used for implementing various data cache operations `op`
  */
@@ -37,6 +52,24 @@
 	ret
 .endm
 
+/* op: the hexadecimal instruction opcode for the cache operation */
+.macro do_dcache_maintenance_instr op
+	/* Exit early if size is zero */
+	cbz	x1, exit_loop_\op
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+	add	x1, x1, x0
+loop_\op:
+	.inst	\op
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	loop_\op
+	dsb	osh
+exit_loop_\op:
+	ret
+.endm
+
 .macro check_plat_can_cmo
 #if CONDITIONAL_CMO
 	mov	x3, x30
@@ -49,10 +82,11 @@
 	mov	 x0, x2
 #endif
 .endm
-	/* ------------------------------------------
-	 * Clean+Invalidate from base address till
-	 * size. 'x0' = addr, 'x1' = size
-	 * ------------------------------------------
+
+	/* -------------------------------------------
+	 * DCache Clean+Invalidate by MVA from base
+	 * address till size. 'x0' = addr, 'x1' = size
+	 * -------------------------------------------
 	 */
 func flush_dcache_range
 	check_plat_can_cmo
@@ -60,8 +94,8 @@
 endfunc flush_dcache_range
 
 	/* ------------------------------------------
-	 * Clean from base address till size.
-	 * 'x0' = addr, 'x1' = size
+	 * DCache Clean by MVA from base address till
+	 * size. 'x0' = addr, 'x1' = size
 	 * ------------------------------------------
 	 */
 func clean_dcache_range
@@ -70,8 +104,8 @@
 endfunc clean_dcache_range
 
 	/* ------------------------------------------
-	 * Invalidate from base address till
-	 * size. 'x0' = addr, 'x1' = size
+	 * DCache Invalidate by MVA from base address
+	 * till size. 'x0' = addr, 'x1' = size
 	 * ------------------------------------------
 	 */
 func inv_dcache_range
@@ -79,37 +113,36 @@
 	do_dcache_maintenance_by_mva ivac
 endfunc inv_dcache_range
 
-
 	/*
-	 * On implementations with FEAT_MTE2,
-	 * Root firmware must issue DC_CIGDPAPA instead of DC_CIPAPA ,
-	 * in order to additionally clean and invalidate Allocation Tags
-	 * associated with the affected locations.
-	 *
 	 * ------------------------------------------
-	 * Clean+Invalidate by PA to POPA
-	 * from base address till size.
+	 * DCache Clean+Invalidate by PA to POPA from
+	 * base address till size.
 	 * 'x0' = addr, 'x1' = size
 	 * ------------------------------------------
 	 */
 func flush_dcache_to_popa_range
-	/* Exit early if size is zero */
-	cbz	x1, exit_loop_dc_cipapa
 	check_plat_can_cmo
-	dcache_line_size x2, x3
-	sub	x3, x2, #1
-	bic	x0, x0, x3
-	add	x1, x1, x0
-loop_dc_cipapa:
-	sys	#6, c7, c14, #1, x0 /* DC CIPAPA,<Xt> */
-	add	x0, x0, x2
-	cmp	x0, x1
-	b.lo	loop_dc_cipapa
-	dsb	osh
-exit_loop_dc_cipapa:
-	ret
+	/* dc cipapa, x0 */
+	do_dcache_maintenance_instr dc_cipapa_x0
 endfunc	flush_dcache_to_popa_range
 
+	/*
+	 * ------------------------------------------
+	 * Clean+Invalidate by PA to POPA (MTE2)
+	 * from base address till size.
+	 * 'x0' = addr, 'x1' = size
+	 * ------------------------------------------
+	 * On implementations with FEAT_MTE2, Root firmware must issue
+	 * DC_CIGDPAPA instead of DC_CIPAPA, in order to additionally
+	 * clean and invalidate Allocation Tags associated with the
+	 * affected locations.
+	 */
+func flush_dcache_to_popa_range_mte2
+	check_plat_can_cmo
+	/* dc cigdpapa, x0 */
+	do_dcache_maintenance_instr dc_cigdpapa_x0
+endfunc	flush_dcache_to_popa_range_mte2
+
 	/* ---------------------------------------------------------------
 	 * Data cache operations by set/way to the level specified
 	 *
diff --git a/lib/gpt_rme/gpt_rme.c b/lib/gpt_rme/gpt_rme.c
index f5353cb..36f7a51 100644
--- a/lib/gpt_rme/gpt_rme.c
+++ b/lib/gpt_rme/gpt_rme.c
@@ -11,6 +11,7 @@
 #include <stdint.h>
 
 #include <arch.h>
+#include <arch_features.h>
 #include <arch_helpers.h>
 #include <common/debug.h>
 #include "gpt_rme_private.h"
@@ -1095,8 +1096,13 @@
 	 * states, remove any data speculatively fetched into the target
 	 * physical address space. Issue DC CIPAPA over address range
 	 */
-	flush_dcache_to_popa_range(nse | base,
-				   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	if (is_feat_mte2_supported()) {
+		flush_dcache_to_popa_range_mte2(nse | base,
+					GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	} else {
+		flush_dcache_to_popa_range(nse | base,
+					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	}
 
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,
 		  gpi_info.gpi_shift, gpi_info.idx, target_pas);
@@ -1107,8 +1113,13 @@
 
 	nse = (uint64_t)GPT_NSE_NS << GPT_NSE_SHIFT;
 
-	flush_dcache_to_popa_range(nse | base,
-				   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	if (is_feat_mte2_supported()) {
+		flush_dcache_to_popa_range_mte2(nse | base,
+					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	} else {
+		flush_dcache_to_popa_range(nse | base,
+					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	}
 
 	/* Unlock access to the L1 tables. */
 	spin_unlock(&gpt_lock);
@@ -1225,8 +1236,13 @@
 	}
 
 	/* Ensure that the scrubbed data has made it past the PoPA */
-	flush_dcache_to_popa_range(nse | base,
-				   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	if (is_feat_mte2_supported()) {
+		flush_dcache_to_popa_range_mte2(nse | base,
+					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	} else {
+		flush_dcache_to_popa_range(nse | base,
+					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	}
 
 	/*
 	 * Remove any data loaded speculatively
@@ -1234,8 +1250,13 @@
 	 */
 	nse = (uint64_t)GPT_NSE_NS << GPT_NSE_SHIFT;
 
-	flush_dcache_to_popa_range(nse | base,
-				   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	if (is_feat_mte2_supported()) {
+		flush_dcache_to_popa_range_mte2(nse | base,
+					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	} else {
+		flush_dcache_to_popa_range(nse | base,
+					   GPT_PGS_ACTUAL_SIZE(gpt_config.p));
+	}
 
 	/* Clear existing GPI encoding and transition granule. */
 	write_gpt(&gpi_info.gpt_l1_desc, gpi_info.gpt_l1_addr,