Merge "doc: Document E and W build options" into integration
diff --git a/Makefile b/Makefile
index ef3b042..d2e7b58 100644
--- a/Makefile
+++ b/Makefile
@@ -937,7 +937,7 @@
 
 .PHONY: libraries
 romlib.bin: libraries
-	${Q}${MAKE} PLAT_DIR=${PLAT_DIR} BUILD_PLAT=${BUILD_PLAT} INCLUDES='${INCLUDES}' DEFINES='${DEFINES}' --no-print-directory -C ${ROMLIBPATH} all
+	${Q}${MAKE} PLAT_DIR=${PLAT_DIR} BUILD_PLAT=${BUILD_PLAT} ENABLE_BTI=${ENABLE_BTI} ARM_ARCH_MINOR=${ARM_ARCH_MINOR} INCLUDES='${INCLUDES}' DEFINES='${DEFINES}' --no-print-directory -C ${ROMLIBPATH} all
 
 cscope:
 	@echo "  CSCOPE"
diff --git a/docs/design/cpu-specific-build-macros.rst b/docs/design/cpu-specific-build-macros.rst
index 07983a9..6b524c2 100644
--- a/docs/design/cpu-specific-build-macros.rst
+++ b/docs/design/cpu-specific-build-macros.rst
@@ -226,6 +226,9 @@
 -  ``ERRATA_A76_1275112``: This applies errata 1275112 workaround to Cortex-A76
    CPU. This needs to be enabled only for revision <= r3p0 of the CPU.
 
+-  ``ERRATA_N1_1315703``: This applies errata 1315703 workaround to Neoverse-N1
+   CPU. This needs to be enabled only for revision <= r3p0 of the CPU.
+
 DSU Errata Workarounds
 ----------------------
 
diff --git a/drivers/arm/gic/v3/gicv3_main.c b/drivers/arm/gic/v3/gicv3_main.c
index cf92f10..a94dbf6 100644
--- a/drivers/arm/gic/v3/gicv3_main.c
+++ b/drivers/arm/gic/v3/gicv3_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2015-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -265,6 +265,10 @@
 	write_scr_el3(scr_el3 & (~SCR_NS_BIT));
 	isb();
 
+	/* Write the secure ICC_SRE_EL1 register */
+	write_icc_sre_el1(ICC_SRE_SRE_BIT);
+	isb();
+
 	/* Program the idle priority in the PMR */
 	write_icc_pmr_el1(GIC_PRI_MASK);
 
@@ -274,9 +278,6 @@
 	/* Enable Group1 Secure interrupts */
 	write_icc_igrpen1_el3(read_icc_igrpen1_el3() |
 				IGRPEN1_EL3_ENABLE_G1S_BIT);
-
-	/* Write the secure ICC_SRE_EL1 register */
-	write_icc_sre_el1(ICC_SRE_SRE_BIT);
 	isb();
 }
 
diff --git a/include/lib/cpus/aarch64/neoverse_n1.h b/include/lib/cpus/aarch64/neoverse_n1.h
index ed5f136..b66aeb8 100644
--- a/include/lib/cpus/aarch64/neoverse_n1.h
+++ b/include/lib/cpus/aarch64/neoverse_n1.h
@@ -35,7 +35,8 @@
  ******************************************************************************/
 #define NEOVERSE_N1_CPUACTLR2_EL1	S3_0_C15_C1_1
 
-#define NEOVERSE_N1_CPUACTLR2_EL1_BIT_2	(ULL(1) << 2)
+#define NEOVERSE_N1_CPUACTLR2_EL1_BIT_2		(ULL(1) << 2)
+#define NEOVERSE_N1_CPUACTLR2_EL1_BIT_16	(ULL(1) << 16)
 
 /* Instruction patching registers */
 #define CPUPSELR_EL3	S3_6_C15_C8_0
diff --git a/lib/cpus/aarch64/cortex_a53.S b/lib/cpus/aarch64/cortex_a53.S
index 6fd3c53..b105de2 100644
--- a/lib/cpus/aarch64/cortex_a53.S
+++ b/lib/cpus/aarch64/cortex_a53.S
@@ -279,13 +279,11 @@
 func cortex_a53_core_pwr_dwn
 	mov	x18, x30
 
-#if !TI_AM65X_WORKAROUND
 	/* ---------------------------------------------
 	 * Turn off caches.
 	 * ---------------------------------------------
 	 */
 	bl	cortex_a53_disable_dcache
-#endif
 
 	/* ---------------------------------------------
 	 * Flush L1 caches.
@@ -305,13 +303,11 @@
 func cortex_a53_cluster_pwr_dwn
 	mov	x18, x30
 
-#if !TI_AM65X_WORKAROUND
 	/* ---------------------------------------------
 	 * Turn off caches.
 	 * ---------------------------------------------
 	 */
 	bl	cortex_a53_disable_dcache
-#endif
 
 	/* ---------------------------------------------
 	 * Flush L1 caches.
diff --git a/lib/cpus/aarch64/cortex_a76.S b/lib/cpus/aarch64/cortex_a76.S
index b48283c..868667e 100644
--- a/lib/cpus/aarch64/cortex_a76.S
+++ b/lib/cpus/aarch64/cortex_a76.S
@@ -18,6 +18,11 @@
 #error "Cortex-A76 must be compiled with HW_ASSISTED_COHERENCY enabled"
 #endif
 
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Cortex-A76 supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
 #define ESR_EL3_A64_SMC0	0x5e000000
 #define ESR_EL3_A32_SMC0	0x4e000000
 
diff --git a/lib/cpus/aarch64/cortex_a76ae.S b/lib/cpus/aarch64/cortex_a76ae.S
index 46e9450..888f98b 100644
--- a/lib/cpus/aarch64/cortex_a76ae.S
+++ b/lib/cpus/aarch64/cortex_a76ae.S
@@ -13,6 +13,11 @@
 #error "Cortex-A76AE must be compiled with HW_ASSISTED_COHERENCY enabled"
 #endif
 
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Cortex-A76AE supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
 	/* ---------------------------------------------
 	 * HW will do the cache maintenance while powering down
 	 * ---------------------------------------------
diff --git a/lib/cpus/aarch64/cortex_deimos.S b/lib/cpus/aarch64/cortex_deimos.S
index e73e89f..df4c128 100644
--- a/lib/cpus/aarch64/cortex_deimos.S
+++ b/lib/cpus/aarch64/cortex_deimos.S
@@ -16,6 +16,11 @@
 #error "Deimos must be compiled with HW_ASSISTED_COHERENCY enabled"
 #endif
 
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Cortex-Deimos supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
 	/* ---------------------------------------------
 	 * HW will do the cache maintenance while powering down
 	 * ---------------------------------------------
diff --git a/lib/cpus/aarch64/neoverse_e1.S b/lib/cpus/aarch64/neoverse_e1.S
index 71e7b51..d840da8 100644
--- a/lib/cpus/aarch64/neoverse_e1.S
+++ b/lib/cpus/aarch64/neoverse_e1.S
@@ -16,6 +16,11 @@
 #error "Neoverse E1 must be compiled with HW_ASSISTED_COHERENCY enabled"
 #endif
 
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Neoverse-E1 supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
 func neoverse_e1_cpu_pwr_dwn
 	mrs	x0, NEOVERSE_E1_CPUPWRCTLR_EL1
 	orr	x0, x0, #NEOVERSE_E1_CPUPWRCTLR_EL1_CORE_PWRDN_BIT
diff --git a/lib/cpus/aarch64/neoverse_n1.S b/lib/cpus/aarch64/neoverse_n1.S
index a0babb0..d685b7e 100644
--- a/lib/cpus/aarch64/neoverse_n1.S
+++ b/lib/cpus/aarch64/neoverse_n1.S
@@ -15,8 +15,13 @@
 #error "Neoverse N1 must be compiled with HW_ASSISTED_COHERENCY enabled"
 #endif
 
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Neoverse-N1 supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
 /* --------------------------------------------------
- * Errata Workaround for Neoverse N1 Errata
+ * Errata Workaround for Neoverse N1 Erratum 1043202.
  * This applies to revision r0p0 and r1p0 of Neoverse N1.
  * Inputs:
  * x0: variant[4:7] and revision[0:3] of current cpu.
@@ -70,6 +75,35 @@
 	ret
 endfunc neoverse_n1_disable_speculative_loads
 
+/* --------------------------------------------------
+ * Errata Workaround for Neoverse N1 Erratum 1315703.
+ * This applies to revision <= r3p0 of Neoverse N1.
+ * Inputs:
+ * x0: variant[4:7] and revision[0:3] of current cpu.
+ * Shall clobber: x0-x17
+ * --------------------------------------------------
+ */
+func errata_n1_1315703_wa
+	/* Compare x0 against revision r3p1 */
+	mov	x17, x30
+	bl	check_errata_1315703
+	cbz	x0, 1f
+
+	mrs	x0, NEOVERSE_N1_CPUACTLR2_EL1
+	orr	x0, x0, #NEOVERSE_N1_CPUACTLR2_EL1_BIT_16
+	msr	NEOVERSE_N1_CPUACTLR2_EL1, x0
+	isb
+
+1:
+	ret	x17
+endfunc errata_n1_1315703_wa
+
+func check_errata_1315703
+	/* Applies to everything <= r3p0. */
+	mov	x1, #0x30
+	b	cpu_rev_var_ls
+endfunc check_errata_1315703
+
 func neoverse_n1_reset_func
 	mov	x19, x30
 
@@ -89,6 +123,11 @@
 	bl	errata_n1_1043202_wa
 #endif
 
+#if ERRATA_N1_1315703
+	mov	x0, x18
+	bl	errata_n1_1315703_wa
+#endif
+
 #if ENABLE_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
@@ -141,6 +180,7 @@
 	 * checking functions of each errata.
 	 */
 	report_errata ERRATA_N1_1043202, neoverse_n1, 1043202
+	report_errata ERRATA_N1_1315703, neoverse_n1, 1315703
 
 	ldp	x8, x30, [sp], #16
 	ret
diff --git a/lib/cpus/aarch64/neoverse_zeus.S b/lib/cpus/aarch64/neoverse_zeus.S
index c5241af..3d85013 100644
--- a/lib/cpus/aarch64/neoverse_zeus.S
+++ b/lib/cpus/aarch64/neoverse_zeus.S
@@ -16,6 +16,11 @@
 #error "Neoverse Zeus must be compiled with HW_ASSISTED_COHERENCY enabled"
 #endif
 
+/* 64-bit only core */
+#if CTX_INCLUDE_AARCH32_REGS == 1
+#error "Neoverse-Zeus supports only AArch64. Compile with CTX_INCLUDE_AARCH32_REGS=0"
+#endif
+
 	/* ---------------------------------------------
 	 * HW will do the cache maintenance while powering down
 	 * ---------------------------------------------
diff --git a/lib/cpus/cpu-ops.mk b/lib/cpus/cpu-ops.mk
index 599e11e..db45375 100644
--- a/lib/cpus/cpu-ops.mk
+++ b/lib/cpus/cpu-ops.mk
@@ -238,6 +238,10 @@
 # only to r0p0 and r1p0 of the Neoverse N1 cpu.
 ERRATA_N1_1043202	?=1
 
+# Flag to apply erratum 1315703 workaround during reset. This erratum applies
+# to revisions before r3p1 of the Neoverse N1 cpu.
+ERRATA_N1_1315703	?=1
+
 # Flag to apply DSU erratum 798953. This erratum applies to DSUs revision r0p0.
 # Applying the workaround results in higher DSU power consumption on idle.
 ERRATA_DSU_798953	?=0
@@ -427,6 +431,10 @@
 $(eval $(call assert_boolean,ERRATA_N1_1043202))
 $(eval $(call add_define,ERRATA_N1_1043202))
 
+# Process ERRATA_N1_1315703 flag
+$(eval $(call assert_boolean,ERRATA_N1_1315703))
+$(eval $(call add_define,ERRATA_N1_1315703))
+
 # Process ERRATA_DSU_798953 flag
 $(eval $(call assert_boolean,ERRATA_DSU_798953))
 $(eval $(call add_define,ERRATA_DSU_798953))
diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index 2928c33..3f5e989 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -568,35 +568,35 @@
 }
 
 /*******************************************************************************
- * This function is passed a cpu_index and the highest level in the topology
- * tree that the operation should be applied to. It picks up locks in order of
- * increasing power domain level in the range specified.
+ * This function is passed the highest level in the topology tree that the
+ * operation should be applied to and a list of node indexes. It picks up locks
+ * from the node index list in order of increasing power domain level in the
+ * range specified.
  ******************************************************************************/
-void psci_acquire_pwr_domain_locks(unsigned int end_pwrlvl, int cpu_idx)
+void psci_acquire_pwr_domain_locks(unsigned int end_pwrlvl,
+				   const unsigned int *parent_nodes)
 {
-	unsigned int parent_idx = psci_cpu_pd_nodes[cpu_idx].parent_node;
+	unsigned int parent_idx;
 	unsigned int level;
 
 	/* No locking required for level 0. Hence start locking from level 1 */
 	for (level = PSCI_CPU_PWR_LVL + 1U; level <= end_pwrlvl; level++) {
+		parent_idx = parent_nodes[level - 1U];
 		psci_lock_get(&psci_non_cpu_pd_nodes[parent_idx]);
-		parent_idx = psci_non_cpu_pd_nodes[parent_idx].parent_node;
 	}
 }
 
 /*******************************************************************************
- * This function is passed a cpu_index and the highest level in the topology
- * tree that the operation should be applied to. It releases the locks in order
- * of decreasing power domain level in the range specified.
+ * This function is passed the highest level in the topology tree that the
+ * operation should be applied to and a list of node indexes. It releases the
+ * locks in order of decreasing power domain level in the range specified.
  ******************************************************************************/
-void psci_release_pwr_domain_locks(unsigned int end_pwrlvl, int cpu_idx)
+void psci_release_pwr_domain_locks(unsigned int end_pwrlvl,
+				   const unsigned int *parent_nodes)
 {
-	unsigned int parent_idx, parent_nodes[PLAT_MAX_PWR_LVL] = {0};
+	unsigned int parent_idx;
 	unsigned int level;
 
-	/* Get the parent nodes */
-	psci_get_parent_pwr_domain_nodes(cpu_idx, end_pwrlvl, parent_nodes);
-
 	/* Unlock top down. No unlocking required for level 0. */
 	for (level = end_pwrlvl; level >= PSCI_CPU_PWR_LVL + 1U; level--) {
 		parent_idx = parent_nodes[level - 1U];
@@ -764,6 +764,7 @@
 {
 	unsigned int end_pwrlvl;
 	int cpu_idx = (int) plat_my_core_pos();
+	unsigned int parent_nodes[PLAT_MAX_PWR_LVL] = {0};
 	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
 
 	/*
@@ -781,12 +782,15 @@
 	 */
 	end_pwrlvl = get_power_on_target_pwrlvl();
 
+	/* Get the parent nodes */
+	psci_get_parent_pwr_domain_nodes(cpu_idx, end_pwrlvl, parent_nodes);
+
 	/*
 	 * This function acquires the lock corresponding to each power level so
 	 * that by the time all locks are taken, the system topology is snapshot
 	 * and state management can be done safely.
 	 */
-	psci_acquire_pwr_domain_locks(end_pwrlvl, cpu_idx);
+	psci_acquire_pwr_domain_locks(end_pwrlvl, parent_nodes);
 
 	psci_get_target_local_pwr_states(end_pwrlvl, &state_info);
 
@@ -831,7 +835,7 @@
 	 * This loop releases the lock corresponding to each power level
 	 * in the reverse order to which they were acquired.
 	 */
-	psci_release_pwr_domain_locks(end_pwrlvl, cpu_idx);
+	psci_release_pwr_domain_locks(end_pwrlvl, parent_nodes);
 }
 
 /*******************************************************************************
diff --git a/lib/psci/psci_off.c b/lib/psci/psci_off.c
index ac03e05..e8cd8fe 100644
--- a/lib/psci/psci_off.c
+++ b/lib/psci/psci_off.c
@@ -45,6 +45,7 @@
 	int rc = PSCI_E_SUCCESS;
 	int idx = (int) plat_my_core_pos();
 	psci_power_state_t state_info;
+	unsigned int parent_nodes[PLAT_MAX_PWR_LVL] = {0};
 
 	/*
 	 * This function must only be called on platforms where the
@@ -56,11 +57,20 @@
 	psci_set_power_off_state(&state_info);
 
 	/*
+	 * Get the parent nodes here, this is important to do before we
+	 * initiate the power down sequence as after that point the core may
+	 * have exited coherency and its cache may be disabled, any access to
+	 * shared memory after that (such as the parent node lookup in
+	 * psci_cpu_pd_nodes) can cause coherency issues on some platforms.
+	 */
+	psci_get_parent_pwr_domain_nodes(idx, end_pwrlvl, parent_nodes);
+
+	/*
 	 * This function acquires the lock corresponding to each power
 	 * level so that by the time all locks are taken, the system topology
 	 * is snapshot and state management can be done safely.
 	 */
-	psci_acquire_pwr_domain_locks(end_pwrlvl, idx);
+	psci_acquire_pwr_domain_locks(end_pwrlvl, parent_nodes);
 
 	/*
 	 * Call the cpu off handler registered by the Secure Payload Dispatcher
@@ -122,7 +132,7 @@
 	 * Release the locks corresponding to each power level in the
 	 * reverse order to which they were acquired.
 	 */
-	psci_release_pwr_domain_locks(end_pwrlvl, idx);
+	psci_release_pwr_domain_locks(end_pwrlvl, parent_nodes);
 
 	/*
 	 * Check if all actions needed to safely power down this cpu have
diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h
index 68ec7fb..bbcc5cf 100644
--- a/lib/psci/psci_private.h
+++ b/lib/psci/psci_private.h
@@ -274,8 +274,10 @@
 				      unsigned int *node_index);
 void psci_do_state_coordination(unsigned int end_pwrlvl,
 				psci_power_state_t *state_info);
-void psci_acquire_pwr_domain_locks(unsigned int end_pwrlvl, int cpu_idx);
-void psci_release_pwr_domain_locks(unsigned int end_pwrlvl, int cpu_idx);
+void psci_acquire_pwr_domain_locks(unsigned int end_pwrlvl,
+				   const unsigned int *parent_nodes);
+void psci_release_pwr_domain_locks(unsigned int end_pwrlvl,
+				   const unsigned int *parent_nodes);
 int psci_validate_suspend_req(const psci_power_state_t *state_info,
 			      unsigned int is_power_down_state);
 unsigned int psci_find_max_off_lvl(const psci_power_state_t *state_info);
diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c
index 8a752c1..6d5c099 100644
--- a/lib/psci/psci_suspend.c
+++ b/lib/psci/psci_suspend.c
@@ -28,10 +28,13 @@
 static void psci_suspend_to_standby_finisher(int cpu_idx,
 					     unsigned int end_pwrlvl)
 {
+	unsigned int parent_nodes[PLAT_MAX_PWR_LVL] = {0};
 	psci_power_state_t state_info;
 
-	psci_acquire_pwr_domain_locks(end_pwrlvl,
-				cpu_idx);
+	/* Get the parent nodes */
+	psci_get_parent_pwr_domain_nodes(cpu_idx, end_pwrlvl, parent_nodes);
+
+	psci_acquire_pwr_domain_locks(end_pwrlvl, parent_nodes);
 
 	/*
 	 * Find out which retention states this CPU has exited from until the
@@ -57,8 +60,7 @@
 	 */
 	psci_set_pwr_domains_to_run(end_pwrlvl);
 
-	psci_release_pwr_domain_locks(end_pwrlvl,
-				cpu_idx);
+	psci_release_pwr_domain_locks(end_pwrlvl, parent_nodes);
 }
 
 /*******************************************************************************
@@ -156,6 +158,7 @@
 {
 	int skip_wfi = 0;
 	int idx = (int) plat_my_core_pos();
+	unsigned int parent_nodes[PLAT_MAX_PWR_LVL] = {0};
 
 	/*
 	 * This function must only be called on platforms where the
@@ -164,13 +167,15 @@
 	assert((psci_plat_pm_ops->pwr_domain_suspend != NULL) &&
 	       (psci_plat_pm_ops->pwr_domain_suspend_finish != NULL));
 
+	/* Get the parent nodes */
+	psci_get_parent_pwr_domain_nodes(idx, end_pwrlvl, parent_nodes);
+
 	/*
 	 * This function acquires the lock corresponding to each power
 	 * level so that by the time all locks are taken, the system topology
 	 * is snapshot and state management can be done safely.
 	 */
-	psci_acquire_pwr_domain_locks(end_pwrlvl,
-				      idx);
+	psci_acquire_pwr_domain_locks(end_pwrlvl, parent_nodes);
 
 	/*
 	 * We check if there are any pending interrupts after the delay
@@ -214,8 +219,8 @@
 	 * Release the locks corresponding to each power level in the
 	 * reverse order to which they were acquired.
 	 */
-	psci_release_pwr_domain_locks(end_pwrlvl,
-				  idx);
+	psci_release_pwr_domain_locks(end_pwrlvl, parent_nodes);
+
 	if (skip_wfi == 1)
 		return;
 
diff --git a/lib/romlib/Makefile b/lib/romlib/Makefile
index 7a3a51e..bc05d0f 100644
--- a/lib/romlib/Makefile
+++ b/lib/romlib/Makefile
@@ -29,6 +29,11 @@
    LDFLAGS += -Map=$(MAPFILE)
 endif
 
+ifeq (${ARM_ARCH_MINOR},0)
+	ASFLAGS = -march=armv8-a
+else
+	ASFLAGS = -march=armv8.${ARM_ARCH_MINOR}-a
+endif
 
 .PHONY: all clean distclean
 
@@ -60,13 +65,13 @@
 
 $(LIB_DIR)/libwrappers.a: $(BUILD_DIR)/jmptbl.i $(WRAPPER_DIR)/jmpvar.o
 	@echo "  AR      $@"
-	$(Q)./genwrappers.sh -b $(WRAPPER_DIR) -o $@ $(BUILD_DIR)/jmptbl.i
+	$(Q)./genwrappers.sh -b $(WRAPPER_DIR) -o $@ --bti=$(ENABLE_BTI) --asflags=$(ASFLAGS) $(BUILD_DIR)/jmptbl.i
 
 $(BUILD_DIR)/jmptbl.i: $(BUILD_DIR)/jmptbl.s
 
 $(BUILD_DIR)/jmptbl.s: ../../$(PLAT_DIR)/jmptbl.i
 	@echo "  TBL     $@"
-	$(Q)./gentbl.sh -o $@ -b $(BUILD_DIR) ../../$(PLAT_DIR)/jmptbl.i
+	$(Q)./gentbl.sh -o $@ -b $(BUILD_DIR) --bti=$(ENABLE_BTI) ../../$(PLAT_DIR)/jmptbl.i
 
 clean:
 	@rm -f $(BUILD_DIR)/*
diff --git a/lib/romlib/gentbl.sh b/lib/romlib/gentbl.sh
index e64cfe2..bfb1ec3cf 100755
--- a/lib/romlib/gentbl.sh
+++ b/lib/romlib/gentbl.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2018-2019, ARM Limited and Contributors. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
@@ -19,6 +19,10 @@
 		build=$2
 		shift 2
 		;;
+	--bti=*)
+		enable_bti=$(echo $1 | sed 's/--bti=\(.*\)/\1/')
+		shift 1
+		;;
 	--)
 		shift
 		break
@@ -47,12 +51,15 @@
 awk -v OFS="\t" '
 BEGIN{print "#index\tlib\tfunction\t[patch]"}
 {print NR-1, $0}' | tee $build/jmptbl.i |
-awk -v OFS="\n" '
+awk -v OFS="\n" -v BTI=$enable_bti '
 BEGIN {print "\t.text",
              "\t.globl\tjmptbl",
              "jmptbl:"}
       {sub(/[:blank:]*#.*/,"")}
-!/^$/ {if ($3 == "reserved")
+!/^$/ {
+	if (BTI == 1)
+		print "\tbti\tj"
+	if ($3 == "reserved")
 		print "\t.word\t0x0"
 	else
 		print "\tb\t" $3}' > $$.tmp &&
diff --git a/lib/romlib/genwrappers.sh b/lib/romlib/genwrappers.sh
index 07d59ac..e092548 100755
--- a/lib/romlib/genwrappers.sh
+++ b/lib/romlib/genwrappers.sh
@@ -19,6 +19,14 @@
 		build=$2
 		shift 2
 		;;
+	--bti=*)
+		enable_bti=$(echo $1 | sed 's/--bti=\(.*\)/\1/')
+		shift 1
+		;;
+	--asflags=*)
+		asflags=$(echo $1 | sed 's/--asflags=\(.*\)/\1/')
+		shift 1
+		;;
 	--)
 		shift
 		break
@@ -30,8 +38,13 @@
 	esac
 done
 
-awk  '{sub(/[:blank:]*#.*/,"")}
-!/^$/ && $NF != "patch" && $NF != "reserved" {print $1*4, $2, $3}' "$@" |
+awk -v BTI=$enable_bti '
+{sub(/[:blank:]*#.*/,"")}
+!/^$/ && $NF != "patch" && $NF != "reserved" {
+		if (BTI == 1)
+			print $1*8, $2, $3
+		else
+			print $1*4, $2, $3}' "$@" |
 while read idx lib sym
 do
 	file=$build/${lib}_$sym
@@ -39,14 +52,20 @@
 	cat <<EOF > $file.s
 	.globl	$sym
 $sym:
+EOF
+if [ $enable_bti = 1 ]
+then
+	echo "\tbti\tjc" >> $file.s
+fi
+	cat <<EOF >> $file.s
 	ldr	x17, =jmptbl
-	ldr	x17, [x17]
 	mov	x16, #$idx
+	ldr	x17, [x17]
 	add	x16, x16, x17
 	br	x16
 EOF
 
-	${CROSS_COMPILE}as -o $file.o $file.s
+	${CROSS_COMPILE}as ${asflags} -o $file.o $file.s
 done
 
 ${CROSS_COMPILE}ar -rc $out $build/*.o
diff --git a/plat/arm/board/fvp/aarch64/fvp_helpers.S b/plat/arm/board/fvp/aarch64/fvp_helpers.S
index 09f19f6..8efc238 100644
--- a/plat/arm/board/fvp/aarch64/fvp_helpers.S
+++ b/plat/arm/board/fvp/aarch64/fvp_helpers.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2013-2019, ARM Limited and Contributors. All rights reserved.
  *
  * SPDX-License-Identifier: BSD-3-Clause
  */
@@ -16,14 +16,6 @@
 	.globl	plat_is_my_cpu_primary
 	.globl	plat_arm_calc_core_pos
 
-	.macro	fvp_choose_gicmmap  param1, param2, x_tmp, w_tmp, res
-	mov_imm	\x_tmp, V2M_SYSREGS_BASE + V2M_SYS_ID
-	ldr	\w_tmp, [\x_tmp]
-	ubfx	\w_tmp, \w_tmp, #V2M_SYS_ID_BLD_SHIFT, #V2M_SYS_ID_BLD_LENGTH
-	cmp	\w_tmp, #BLD_GIC_VE_MMAP
-	csel	\res, \param1, \param2, eq
-	.endm
-
 	/* -----------------------------------------------------
 	 * void plat_secondary_cold_boot_setup (void);
 	 *
@@ -49,35 +41,6 @@
 	mov_imm	x1, PWRC_BASE
 	str	w0, [x1, #PPOFFR_OFF]
 
-	/* ---------------------------------------------
-	 * Disable GIC bypass as well
-	 * ---------------------------------------------
-	 */
-	/* Check for GICv3 system register access */
-	mrs	x0, id_aa64pfr0_el1
-	ubfx	x0, x0, #ID_AA64PFR0_GIC_SHIFT, #ID_AA64PFR0_GIC_WIDTH
-	cmp	x0, #1
-	b.ne	gicv2_bypass_disable
-
-	/* Check for SRE enable */
-	mrs	x1, ICC_SRE_EL3
-	tst	x1, #ICC_SRE_SRE_BIT
-	b.eq	gicv2_bypass_disable
-
-	mrs	x2, ICC_SRE_EL3
-	orr	x2, x2, #(ICC_SRE_DIB_BIT | ICC_SRE_DFB_BIT)
-	msr	ICC_SRE_EL3, x2
-	b	secondary_cold_boot_wait
-
-gicv2_bypass_disable:
-	mov_imm	x0, VE_GICC_BASE
-	mov_imm	x1, BASE_GICC_BASE
-	fvp_choose_gicmmap	x0, x1, x2, w2, x1
-	mov	w0, #(IRQ_BYP_DIS_GRP1 | FIQ_BYP_DIS_GRP1)
-	orr	w0, w0, #(IRQ_BYP_DIS_GRP0 | FIQ_BYP_DIS_GRP0)
-	str	w0, [x1, #GICC_CTLR]
-
-secondary_cold_boot_wait:
 	/* ---------------------------------------------
 	 * There is no sane reason to come out of this
 	 * wfi so panic if we do. This cpu will be pow-
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index dbc5c21..3cbdfbc 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -96,8 +96,8 @@
 
 ifeq (${ARCH}, aarch64)
 
-# select a different set of CPU files, depending on whether we compile with
-# hardware assisted coherency configurations or not
+# select a different set of CPU files, depending on whether we compile for
+# hardware assisted coherency cores or not
 ifeq (${HW_ASSISTED_COHERENCY}, 0)
 	FVP_CPU_LIBS	+=	lib/cpus/aarch64/cortex_a35.S			\
 				lib/cpus/aarch64/cortex_a53.S			\
@@ -105,14 +105,19 @@
 				lib/cpus/aarch64/cortex_a72.S			\
 				lib/cpus/aarch64/cortex_a73.S
 else
-	FVP_CPU_LIBS	+=	lib/cpus/aarch64/cortex_a55.S			\
-				lib/cpus/aarch64/cortex_a75.S			\
-				lib/cpus/aarch64/cortex_a76.S			\
-				lib/cpus/aarch64/cortex_a76ae.S			\
-				lib/cpus/aarch64/neoverse_n1.S			\
-				lib/cpus/aarch64/neoverse_e1.S			\
-				lib/cpus/aarch64/cortex_deimos.S		\
-				lib/cpus/aarch64/neoverse_zeus.S
+	# AArch64-only cores
+	ifeq (${CTX_INCLUDE_AARCH32_REGS}, 0)
+		FVP_CPU_LIBS	+=	lib/cpus/aarch64/cortex_a76.S		\
+					lib/cpus/aarch64/cortex_a76ae.S		\
+					lib/cpus/aarch64/neoverse_n1.S		\
+					lib/cpus/aarch64/neoverse_e1.S		\
+					lib/cpus/aarch64/cortex_deimos.S	\
+					lib/cpus/aarch64/neoverse_zeus.S
+	# AArch64/AArch32
+	else
+		FVP_CPU_LIBS	+=	lib/cpus/aarch64/cortex_a55.S		\
+					lib/cpus/aarch64/cortex_a75.S
+	endif
 endif
 
 else
diff --git a/plat/mediatek/mt8183/aarch64/platform_common.c b/plat/mediatek/mt8183/aarch64/platform_common.c
index ff0aaeb..31d1339 100644
--- a/plat/mediatek/mt8183/aarch64/platform_common.c
+++ b/plat/mediatek/mt8183/aarch64/platform_common.c
@@ -7,10 +7,16 @@
 #include <arch_helpers.h>
 #include <common/bl_common.h>
 #include <common/debug.h>
+#include <mcsi/mcsi.h>
 #include <platform_def.h>
 #include <lib/utils.h>
 #include <lib/xlat_tables/xlat_tables.h>
 
+static const int cci_map[] = {
+	PLAT_MT_CCI_CLUSTER0_SL_IFACE_IX,
+	PLAT_MT_CCI_CLUSTER1_SL_IFACE_IX
+};
+
 /* Table of regions to map using the MMU.  */
 const mmap_region_t plat_mmap[] = {
 	/* for TF text, RO, RW */
@@ -51,3 +57,28 @@
 {
 	return SYS_COUNTER_FREQ_IN_TICKS;
 }
+
+void plat_mtk_cci_init(void)
+{
+	/* Initialize CCI driver */
+	mcsi_init(PLAT_MT_CCI_BASE, ARRAY_SIZE(cci_map));
+}
+
+void plat_mtk_cci_enable(void)
+{
+	/* Enable CCI coherency for this cluster.
+	 * No need for locks as no other cpu is active at the moment.
+	 */
+	cci_enable_cluster_coherency(read_mpidr());
+}
+
+void plat_mtk_cci_disable(void)
+{
+	cci_disable_cluster_coherency(read_mpidr());
+}
+
+void plat_mtk_cci_init_sf(void)
+{
+	/* Init mcsi snoop filter. */
+	cci_init_sf();
+}
diff --git a/plat/mediatek/mt8183/bl31_plat_setup.c b/plat/mediatek/mt8183/bl31_plat_setup.c
index 1e5367f..b451189 100644
--- a/plat/mediatek/mt8183/bl31_plat_setup.c
+++ b/plat/mediatek/mt8183/bl31_plat_setup.c
@@ -12,6 +12,7 @@
 #include <common/debug.h>
 #include <drivers/generic_delay_timer.h>
 #include <mcucfg.h>
+#include <mt_gic_v3.h>
 #include <lib/mmio.h>
 #include <mtk_plat_common.h>
 #include <plat_debug.h>
@@ -69,8 +70,8 @@
 				u_register_t arg2, u_register_t arg3)
 {
 	struct mtk_bl31_params *arg_from_bl2 = (struct mtk_bl31_params *)arg0;
-
 	static console_16550_t console;
+
 	console_16550_register(UART0_BASE, UART_CLOCK, UART_BAUDRATE, &console);
 
 	NOTICE("MT8183 bl31_setup\n");
@@ -91,6 +92,13 @@
 {
 	platform_setup_cpu();
 	generic_delay_timer_init();
+
+	/* Initialize the GIC driver, CPU and distributor interfaces */
+	mt_gic_driver_init();
+	mt_gic_init();
+
+	/* Init mcsi SF */
+	plat_mtk_cci_init_sf();
 }
 
 /*******************************************************************************
@@ -99,6 +107,9 @@
  ******************************************************************************/
 void bl31_plat_arch_setup(void)
 {
+	plat_mtk_cci_init();
+	plat_mtk_cci_enable();
+
 	enable_scu(read_mpidr());
 
 	plat_configure_mmu_el3(BL_CODE_BASE,
diff --git a/plat/mediatek/mt8183/drivers/mcsi/mcsi.c b/plat/mediatek/mt8183/drivers/mcsi/mcsi.c
new file mode 100644
index 0000000..cbe7f0a
--- /dev/null
+++ b/plat/mediatek/mt8183/drivers/mcsi/mcsi.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch.h>
+#include <arch_helpers.h>
+#include <assert.h>
+#include <common/debug.h>
+#include <lib/mmio.h>
+#include <scu.h>
+#include <mcucfg.h>
+#include <drivers/delay_timer.h>
+#include <mcsi/mcsi.h>
+
+#define MAX_CLUSTERS		5
+
+static unsigned long cci_base_addr;
+static unsigned int cci_cluster_ix_to_iface[MAX_CLUSTERS];
+
+void mcsi_init(unsigned long cci_base,
+		unsigned int num_cci_masters)
+{
+	int i;
+
+	assert(cci_base);
+	assert(num_cci_masters < MAX_CLUSTERS);
+
+	cci_base_addr = cci_base;
+
+	for (i = 0; i < num_cci_masters; i++)
+		cci_cluster_ix_to_iface[i] = SLAVE_IFACE_OFFSET(i);
+}
+
+void mcsi_cache_flush(void)
+{
+	/* timeout is 10ms */
+	int timeout = 10000;
+
+	/* to make flush by SF safe, need to disable BIU DCM */
+	mmio_clrbits_32(CCI_CLK_CTRL, 1 << 8);
+	mmio_write_32(cci_base_addr + FLUSH_SF, 0x1);
+
+	for (; timeout; timeout--, udelay(1)) {
+		if ((mmio_read_32(cci_base_addr + FLUSH_SF) & 0x1) == 0x0)
+			break;
+	}
+
+	if (!timeout) {
+		INFO("SF lush timeout\n");
+		return;
+	}
+
+	/* enable BIU DCM as it was */
+	mmio_setbits_32(CCI_CLK_CTRL, 1 << 8);
+}
+
+static inline unsigned long get_slave_iface_base(unsigned long mpidr)
+{
+	/*
+	 * We assume the TF topology code allocates affinity instances
+	 * consecutively from zero.
+	 * It is a programming error if this is called without initializing
+	 * the slave interface to use for this cluster.
+	 */
+	unsigned int cluster_id =
+		(mpidr >> MPIDR_AFF1_SHIFT) & MPIDR_AFFLVL_MASK;
+
+	assert(cluster_id < MAX_CLUSTERS);
+	assert(cci_cluster_ix_to_iface[cluster_id] != 0);
+
+	return cci_base_addr + cci_cluster_ix_to_iface[cluster_id];
+}
+
+void cci_enable_cluster_coherency(unsigned long mpidr)
+{
+	unsigned long slave_base;
+	unsigned int support_ability;
+	unsigned int config = 0;
+	unsigned int pending = 0;
+
+	assert(cci_base_addr);
+	slave_base  = get_slave_iface_base(mpidr);
+	support_ability = mmio_read_32(slave_base);
+
+	pending = (mmio_read_32(
+		   cci_base_addr + SNP_PENDING_REG)) >> SNP_PENDING;
+	while (pending) {
+		pending = (mmio_read_32(
+			   cci_base_addr + SNP_PENDING_REG)) >> SNP_PENDING;
+	}
+
+	if (support_ability & SNP_SUPPORT)
+		config |= SNOOP_EN_BIT;
+	if (support_ability & DVM_SUPPORT)
+		config |= DVM_EN_BIT;
+
+	mmio_write_32(slave_base, support_ability | config);
+
+	/* Wait for the dust to settle down */
+	while (mmio_read_32(cci_base_addr + SNP_PENDING_REG) >> SNP_PENDING)
+		;
+}
+
+#if ERRATA_MCSIB_SW
+#pragma weak mcsib_sw_workaround_main
+#endif
+
+void cci_disable_cluster_coherency(unsigned long mpidr)
+{
+	unsigned long slave_base;
+	unsigned int config = 0;
+
+	assert(cci_base_addr);
+	slave_base = get_slave_iface_base(mpidr);
+
+	while (mmio_read_32(cci_base_addr + SNP_PENDING_REG) >> SNP_PENDING)
+		;
+
+	config = mmio_read_32(slave_base);
+	config &= ~(DVM_EN_BIT | SNOOP_EN_BIT);
+
+	/* Disable Snoops and DVM messages */
+	mmio_write_32(slave_base, config);
+
+#if ERRATA_MCSIB_SW
+	mcsib_sw_workaround_main();
+#endif
+
+	/* Wait for the dust to settle down */
+	while (mmio_read_32(cci_base_addr + SNP_PENDING_REG) >> SNP_PENDING)
+		;
+}
+
+void cci_secure_switch(unsigned int status)
+{
+	unsigned int config;
+
+	config = mmio_read_32(cci_base_addr + CENTRAL_CTRL_REG);
+	if (status == NS_ACC)
+		config |= SECURE_ACC_EN;
+	else
+		config &= ~SECURE_ACC_EN;
+	mmio_write_32(cci_base_addr + CENTRAL_CTRL_REG, config);
+}
+
+void cci_pmu_secure_switch(unsigned int status)
+{
+	unsigned int config;
+
+	config = mmio_read_32(cci_base_addr + CENTRAL_CTRL_REG);
+	if (status == NS_ACC)
+		config |= PMU_SECURE_ACC_EN;
+	else
+		config &= ~PMU_SECURE_ACC_EN;
+	mmio_write_32(cci_base_addr + CENTRAL_CTRL_REG, config);
+}
+
+void cci_init_sf(void)
+{
+	while (mmio_read_32(cci_base_addr + SNP_PENDING_REG) >> SNP_PENDING)
+		;
+	/* init sf1 */
+	mmio_write_32(cci_base_addr + SF_INIT_REG, TRIG_SF1_INIT);
+	while (mmio_read_32(cci_base_addr + SF_INIT_REG) & TRIG_SF1_INIT)
+		;
+	while (!(mmio_read_32(cci_base_addr + SF_INIT_REG) & SF1_INIT_DONE))
+		;
+	/* init sf2 */
+	mmio_write_32(cci_base_addr + SF_INIT_REG, TRIG_SF2_INIT);
+	while (mmio_read_32(cci_base_addr + SF_INIT_REG) & TRIG_SF2_INIT)
+		;
+	while (!(mmio_read_32(cci_base_addr + SF_INIT_REG) & SF2_INIT_DONE))
+		;
+}
+
+void cci_interrupt_en(void)
+{
+	mmio_setbits_32(cci_base_addr + CENTRAL_CTRL_REG, INT_EN);
+}
+
+unsigned long cci_reg_access(unsigned int op, unsigned long offset,
+			     unsigned long val)
+{
+	unsigned long ret = 0;
+
+	if ((cci_base_addr == 0) || (offset > MSCI_MEMORY_SZ))
+		panic();
+
+	switch (op) {
+	case MCSI_REG_ACCESS_READ:
+		ret = mmio_read_32(cci_base_addr + offset);
+		break;
+	case MCSI_REG_ACCESS_WRITE:
+		mmio_write_32(cci_base_addr + offset, val);
+		dsb();
+		break;
+	case MCSI_REG_ACCESS_SET_BITMASK:
+		mmio_setbits_32(cci_base_addr + offset, val);
+		dsb();
+		break;
+	case MCSI_REG_ACCESS_CLEAR_BITMASK:
+		mmio_clrbits_32(cci_base_addr + offset, val);
+		dsb();
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
diff --git a/plat/mediatek/mt8183/drivers/mcsi/mcsi.h b/plat/mediatek/mt8183/drivers/mcsi/mcsi.h
new file mode 100644
index 0000000..c13e22a
--- /dev/null
+++ b/plat/mediatek/mt8183/drivers/mcsi/mcsi.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef MCSI_H
+#define MCSI_H
+
+#define SLAVE_IFACE7_OFFSET		0x1700
+#define SLAVE_IFACE6_OFFSET		0x1600
+#define SLAVE_IFACE5_OFFSET		0x1500
+#define SLAVE_IFACE4_OFFSET		0x1400
+#define SLAVE_IFACE3_OFFSET		0x1300
+#define SLAVE_IFACE2_OFFSET		0x1200
+#define SLAVE_IFACE1_OFFSET		0x1100
+#define SLAVE_IFACE0_OFFSET		0x1000
+#define SLAVE_IFACE_OFFSET(index)	(SLAVE_IFACE0_OFFSET + \
+							(0x100 * (index)))
+/* Control and ID register offsets */
+#define CENTRAL_CTRL_REG		0x0
+#define ERR_FLAG_REG			0x4
+#define SF_INIT_REG			0x10
+#define SF_CTRL_REG			0x14
+#define DCM_CTRL_REG			0x18
+#define ERR_FLAG2_REG			0x20
+#define SNP_PENDING_REG			0x28
+#define ACP_PENDING_REG			0x2c
+#define FLUSH_SF			0x500
+#define SYS_CCE_CTRL			0x2000
+#define MST1_CTRL			0x2100
+#define MTS2_CTRL			0x2200
+#define XBAR_ARAW_ARB			0x3000
+#define XBAR_R_ARB			0x3004
+
+/* Slave interface register offsets */
+#define SNOOP_CTRL_REG			0x0
+#define QOS_CTRL_REG			0x4
+#define QOS_OVERRIDE_REG		0x8
+#define QOS_TARGET_REG			0xc
+#define BD_CTRL_REG			0x40
+
+/* Snoop Control register bit definitions */
+#define DVM_SUPPORT			(1 << 31)
+#define SNP_SUPPORT			(1 << 30)
+#define SHAREABLE_OVWRT			(1 << 2)
+#define DVM_EN_BIT			(1 << 1)
+#define SNOOP_EN_BIT			(1 << 0)
+#define SF2_INIT_DONE			(1 << 17)
+#define SF1_INIT_DONE			(1 << 16)
+#define TRIG_SF2_INIT			(1 << 1)
+#define TRIG_SF1_INIT			(1 << 0)
+
+/* Status register bit definitions */
+#define SNP_PENDING			31
+
+/* Status bit */
+#define NS_ACC				1
+#define S_ACC				0
+
+/* Central control register bit definitions */
+#define PMU_SECURE_ACC_EN		(1 << 4)
+#define INT_EN				(1 << 3)
+#define SECURE_ACC_EN			(1 << 2)
+#define DVM_DIS				(1 << 1)
+#define SNOOP_DIS			(1 << 0)
+
+#define MSCI_MEMORY_SZ			(0x10000)
+
+#define MCSI_REG_ACCESS_READ		(0x0)
+#define MCSI_REG_ACCESS_WRITE		(0x1)
+#define MCSI_REG_ACCESS_SET_BITMASK	(0x2)
+#define MCSI_REG_ACCESS_CLEAR_BITMASK	(0x3)
+
+#define NR_MAX_SLV			(7)
+
+/* ICCS */
+#define CACHE_INSTR_EN			(1 << 2)
+#define IDLE_CACHE			(1 << 3)
+#define USE_SHARED_CACHE		(1 << 4)
+#define CACHE_SHARED_PRE_EN		(1 << 5)
+#define CACHE_SHARED_POST_EN		(1 << 6)
+
+#define ACP_PENDING_MASK		(0x1007f)
+
+#define CCI_CLK_CTRL			(MCUCFG_BASE + 0x660)
+
+#ifndef __ASSEMBLY__
+
+#include <plat/common/common_def.h>
+#include <stdint.h>
+
+/* Function declarations */
+
+/*
+ * The MCSI driver must be initialized with the base address of the
+ * MCSI device in the platform memory map, and the cluster indices for
+ * the MCSI slave interfaces 3 and 4 respectively. These are the fully
+ * coherent ACE slave interfaces of MCSI.
+ * The cluster indices must either be 0 or 1, corresponding to the level 1
+ * affinity instance of the mpidr representing the cluster. A negative cluster
+ * index indicates that no cluster is present on that slave interface.
+ */
+void mcsi_init(unsigned long cci_base,
+		unsigned int num_cci_masters);
+void mcsi_cache_flush(void);
+
+void cci_enable_cluster_coherency(unsigned long mpidr);
+void cci_disable_cluster_coherency(unsigned long mpidr);
+
+void cci_secure_switch(unsigned int ns);
+void cci_init_sf(void);
+unsigned long cci_reg_access(unsigned int op, unsigned long offset, unsigned long val);
+
+#endif /* __ASSEMBLY__ */
+#endif /* MCSI_H */
diff --git a/plat/mediatek/mt8183/include/mt_gic_v3.h b/plat/mediatek/mt8183/include/mt_gic_v3.h
new file mode 100644
index 0000000..e2706f4
--- /dev/null
+++ b/plat/mediatek/mt8183/include/mt_gic_v3.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef MT_GIC_V3_H
+#define MT_GIC_V3_H
+
+#include <lib/mmio.h>
+
+enum irq_schedule_mode {
+	SW_MODE,
+	HW_MODE
+};
+
+#define GIC_INT_MASK (MCUCFG_BASE + 0x5e8)
+#define GIC500_ACTIVE_SEL_SHIFT 3
+#define GIC500_ACTIVE_SEL_MASK (0x7 << GIC500_ACTIVE_SEL_SHIFT)
+#define GIC500_ACTIVE_CPU_SHIFT 16
+#define GIC500_ACTIVE_CPU_MASK (0xff << GIC500_ACTIVE_CPU_SHIFT)
+
+void mt_gic_driver_init(void);
+void mt_gic_init(void);
+void mt_gic_set_pending(uint32_t irq);
+uint32_t mt_gic_get_pending(uint32_t irq);
+void mt_gic_cpuif_enable(void);
+void mt_gic_cpuif_disable(void);
+void mt_gic_pcpu_init(void);
+void mt_gic_irq_save(void);
+void mt_gic_irq_restore(void);
+void mt_gic_sync_dcm_enable(void);
+void mt_gic_sync_dcm_disable(void);
+
+#endif /* MT_GIC_V3_H */
diff --git a/plat/mediatek/mt8183/include/plat_private.h b/plat/mediatek/mt8183/include/plat_private.h
index e57ae45..0853934 100644
--- a/plat/mediatek/mt8183/include/plat_private.h
+++ b/plat/mediatek/mt8183/include/plat_private.h
@@ -17,11 +17,10 @@
 			    uintptr_t coh_start,
 			    uintptr_t coh_limit);
 
-void plat_cci_init(void);
-void plat_cci_enable(void);
-void plat_cci_disable(void);
-void plat_cci_init_sf(void);
-void plat_gic_init(void);
+void plat_mtk_cci_init(void);
+void plat_mtk_cci_enable(void);
+void plat_mtk_cci_disable(void);
+void plat_mtk_cci_init_sf(void);
 
 /* Declarations for plat_topology.c */
 int mt_setup_topology(void);
diff --git a/plat/mediatek/mt8183/plat_mt_gic.c b/plat/mediatek/mt8183/plat_mt_gic.c
new file mode 100644
index 0000000..2144379
--- /dev/null
+++ b/plat/mediatek/mt8183/plat_mt_gic.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2019, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <common/bl_common.h>
+#include <common/debug.h>
+#include <drivers/arm/gicv3.h>
+#include <bl31/interrupt_mgmt.h>
+#include <../drivers/arm/gic/v3/gicv3_private.h>
+#include <mt_gic_v3.h>
+#include <mtk_plat_common.h>
+#include "plat_private.h"
+#include <plat/common/platform.h>
+#include <platform_def.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#define NR_INT_POL_CTL         20
+
+uintptr_t rdistif_base_addrs[PLATFORM_CORE_COUNT];
+
+/*
+ * We save and restore the GICv3 context on system suspend. Allocate the
+ * data in the designated EL3 Secure carve-out memory
+ */
+gicv3_redist_ctx_t rdist_ctx __section("arm_el3_tzc_dram");
+gicv3_dist_ctx_t dist_ctx __section("arm_el3_tzc_dram");
+
+
+static unsigned int mt_mpidr_to_core_pos(u_register_t mpidr)
+{
+	return plat_core_pos_by_mpidr(mpidr);
+}
+
+gicv3_driver_data_t mt_gicv3_data = {
+	.gicd_base = MT_GIC_BASE,
+	.gicr_base = MT_GIC_RDIST_BASE,
+	.rdistif_num = PLATFORM_CORE_COUNT,
+	.rdistif_base_addrs = rdistif_base_addrs,
+	.mpidr_to_core_pos = mt_mpidr_to_core_pos,
+};
+
+void setup_int_schedule_mode(enum irq_schedule_mode mode,
+			     unsigned int active_cpu)
+{
+	assert(mode <= HW_MODE);
+	assert(active_cpu <= 0xFF);
+
+	if (mode == HW_MODE) {
+		mmio_write_32(GIC_INT_MASK,
+		(mmio_read_32(GIC_INT_MASK) & ~(GIC500_ACTIVE_SEL_MASK))
+		| (0x1 << GIC500_ACTIVE_SEL_SHIFT));
+	} else if (mode == SW_MODE) {
+		mmio_write_32(GIC_INT_MASK,
+		(mmio_read_32(GIC_INT_MASK) & ~(GIC500_ACTIVE_SEL_MASK)));
+	}
+
+	mmio_write_32(GIC_INT_MASK,
+		(mmio_read_32(GIC_INT_MASK) & ~(GIC500_ACTIVE_CPU_MASK))
+		| (active_cpu << GIC500_ACTIVE_CPU_SHIFT));
+	return;
+}
+
+void clear_sec_pol_ctl_en(void)
+{
+	unsigned int i;
+
+	/* total 19 polarity ctrl registers */
+	for (i = 0; i <= NR_INT_POL_CTL - 1; i++) {
+		mmio_write_32((SEC_POL_CTL_EN0 + (i * 4)), 0);
+	}
+	dsb();
+}
+
+void mt_gic_driver_init(void)
+{
+	gicv3_driver_init(&mt_gicv3_data);
+}
+
+void mt_gic_init(void)
+{
+	gicv3_distif_init();
+	gicv3_rdistif_init(plat_my_core_pos());
+	gicv3_cpuif_enable(plat_my_core_pos());
+
+	setup_int_schedule_mode(SW_MODE, 0xf);
+	clear_sec_pol_ctl_en();
+}
+
+void mt_gic_set_pending(uint32_t irq)
+{
+	gicv3_set_interrupt_pending(irq, plat_my_core_pos());
+}
+
+uint32_t mt_gic_get_pending(uint32_t irq)
+{
+	uint32_t bit = 1 << (irq % 32);
+
+	return (mmio_read_32(gicv3_driver_data->gicd_base +
+			     GICD_ISPENDR + irq / 32 * 4) & bit) ? 1 : 0;
+}
+
+void mt_gic_cpuif_enable(void)
+{
+	gicv3_cpuif_enable(plat_my_core_pos());
+}
+
+void mt_gic_cpuif_disable(void)
+{
+	gicv3_cpuif_disable(plat_my_core_pos());
+}
+
+void mt_gic_pcpu_init(void)
+{
+	gicv3_rdistif_init(plat_my_core_pos());
+}
+
+void mt_gic_irq_save(void)
+{
+	gicv3_rdistif_save(plat_my_core_pos(), &rdist_ctx);
+	gicv3_distif_save(&dist_ctx);
+}
+
+void mt_gic_irq_restore(void)
+{
+	gicv3_distif_init_restore(&dist_ctx);
+	gicv3_rdistif_init_restore(plat_my_core_pos(), &rdist_ctx);
+}
+
+void mt_gic_sync_dcm_enable(void)
+{
+	unsigned int val = mmio_read_32(GIC_SYNC_DCM);
+
+	val &= ~GIC_SYNC_DCM_MASK;
+	mmio_write_32(GIC_SYNC_DCM, val | GIC_SYNC_DCM_ON);
+}
+
+void mt_gic_sync_dcm_disable(void)
+{
+	unsigned int val = mmio_read_32(GIC_SYNC_DCM);
+
+	val &= ~GIC_SYNC_DCM_MASK;
+	mmio_write_32(GIC_SYNC_DCM, val | GIC_SYNC_DCM_OFF);
+}
diff --git a/plat/mediatek/mt8183/platform.mk b/plat/mediatek/mt8183/platform.mk
index 2ceb459..8c8e2fe 100644
--- a/plat/mediatek/mt8183/platform.mk
+++ b/plat/mediatek/mt8183/platform.mk
@@ -8,18 +8,20 @@
 MTK_PLAT_SOC  := ${MTK_PLAT}/${PLAT}
 
 PLAT_INCLUDES := -I${MTK_PLAT}/common/                            \
+                 -I${MTK_PLAT_SOC}/drivers/                       \
                  -I${MTK_PLAT_SOC}/include/
 
 PLAT_BL_COMMON_SOURCES := lib/xlat_tables/aarch64/xlat_tables.c       \
                           lib/xlat_tables/xlat_tables_common.c        \
-                          plat/common/plat_gicv2.c                    \
                           plat/common/plat_psci_common.c              \
                           plat/common/aarch64/crash_console_helpers.S
 
 BL31_SOURCES    += drivers/arm/cci/cci.c                                 \
                    drivers/arm/gic/common/gic_common.c                   \
-                   drivers/arm/gic/v2/gicv2_main.c                       \
-                   drivers/arm/gic/v2/gicv2_helpers.c                    \
+                   drivers/arm/gic/v3/arm_gicv3_common.c                 \
+                   drivers/arm/gic/v3/gicv3_helpers.c                    \
+                   drivers/arm/gic/v3/gic500.c                           \
+                   drivers/arm/gic/v3/gicv3_main.c                       \
                    drivers/delay_timer/delay_timer.c                     \
                    drivers/delay_timer/generic_delay_timer.c             \
                    drivers/gpio/gpio.c                                   \
@@ -27,11 +29,14 @@
                    lib/cpus/aarch64/aem_generic.S                        \
                    lib/cpus/aarch64/cortex_a53.S                         \
                    lib/cpus/aarch64/cortex_a73.S                         \
+                   plat/common/plat_gicv3.c                              \
                    ${MTK_PLAT}/common/mtk_plat_common.c                  \
                    ${MTK_PLAT_SOC}/aarch64/plat_helpers.S                \
                    ${MTK_PLAT_SOC}/aarch64/platform_common.c             \
+                   ${MTK_PLAT_SOC}/drivers/mcsi/mcsi.c                   \
                    ${MTK_PLAT_SOC}/plat_pm.c                             \
                    ${MTK_PLAT_SOC}/plat_topology.c                       \
+                   ${MTK_PLAT_SOC}/plat_mt_gic.c                         \
                    ${MTK_PLAT_SOC}/bl31_plat_setup.c                     \
                    ${MTK_PLAT_SOC}/plat_debug.c                          \
                    ${MTK_PLAT_SOC}/scu.c
diff --git a/plat/ti/k3/common/k3_psci.c b/plat/ti/k3/common/k3_psci.c
index c7754e9..de9cefe 100644
--- a/plat/ti/k3/common/k3_psci.c
+++ b/plat/ti/k3/common/k3_psci.c
@@ -17,11 +17,6 @@
 #include <k3_gicv3.h>
 #include <ti_sci.h>
 
-#ifdef TI_AM65X_WORKAROUND
-/* Need to flush psci internal locks before shutdown or their values are lost */
-#include "../../../../lib/psci/psci_private.h"
-#endif
-
 uintptr_t k3_sec_entrypoint;
 
 static void k3_cpu_standby(plat_local_state_t cpu_state)
@@ -114,16 +109,6 @@
 	k3_gic_pcpu_init();
 	k3_gic_cpuif_enable();
 }
-
-#ifdef TI_AM65X_WORKAROUND
-static void  __dead2 k3_pwr_domain_pwr_down_wfi(const psci_power_state_t
-						  *target_state)
-{
-	flush_cpu_data(psci_svc_cpu_data);
-	flush_dcache_range((uintptr_t) psci_locks, sizeof(psci_locks));
-	psci_power_down_wfi();
-}
-#endif
 
 static void __dead2 k3_system_reset(void)
 {
@@ -154,9 +139,6 @@
 	.pwr_domain_on = k3_pwr_domain_on,
 	.pwr_domain_off = k3_pwr_domain_off,
 	.pwr_domain_on_finish = k3_pwr_domain_on_finish,
-#ifdef TI_AM65X_WORKAROUND
-	.pwr_domain_pwr_down_wfi = k3_pwr_domain_pwr_down_wfi,
-#endif
 	.system_reset = k3_system_reset,
 	.validate_power_state = k3_validate_power_state,
 	.validate_ns_entrypoint = k3_validate_ns_entrypoint
diff --git a/plat/ti/k3/common/plat_common.mk b/plat/ti/k3/common/plat_common.mk
index 2e5f584..83e9c62 100644
--- a/plat/ti/k3/common/plat_common.mk
+++ b/plat/ti/k3/common/plat_common.mk
@@ -12,8 +12,8 @@
 PROGRAMMABLE_RESET_ADDRESS:=	1
 
 # System coherency is managed in hardware
-HW_ASSISTED_COHERENCY	:=	1
-USE_COHERENT_MEM	:=	0
+WARMBOOT_ENABLE_DCACHE_EARLY :=	1
+USE_COHERENT_MEM	:=	1
 
 # A53 erratum for SoC. (enable them all)
 ERRATA_A53_826319	:=	1
@@ -28,10 +28,6 @@
 # Split out RO data into a non-executable section
 SEPARATE_CODE_AND_RODATA :=    1
 
-# Leave the caches enabled on core powerdown path
-TI_AM65X_WORKAROUND	:=	1
-$(eval $(call add_define,TI_AM65X_WORKAROUND))
-
 MULTI_CONSOLE_API	:=	1
 TI_16550_MDR_QUIRK	:=	1
 $(eval $(call add_define,TI_16550_MDR_QUIRK))
diff --git a/readme.rst b/readme.rst
index 6846419..84c8020 100644
--- a/readme.rst
+++ b/readme.rst
@@ -222,7 +222,7 @@
 
 This release also contains the following platform support:
 
--  Allwinner sun50i_a64 and sun50i_h6
+-  Allwinner sun50i (A64, H5, and H6) SoCs
 -  Amlogic Meson S905 (GXBB)
 -  Amlogic Meson S905x (GXL)
 -  Arm Juno Software Development Platform