feat(msm8916): clear CACHE_LOCK for MMU-500 r2p0+

Newer Qualcomm platforms similar to MSM8916 use MMU-500 r2p0+ instead
of MMU-500 r0p0. On these versions it is necessary to clear the
SMMU_sACR.CACHE_LOCK bit to allow the normal world to write to
SMMU_CBn_ACTLR. Without this Linux shows a warning and is unable to
workaround the errata in MMU-500:

  arm-smmu 1e00000.iommu: Failed to disable prefetcher
    [errata #841119  and #826419], check ACR.CACHE_LOCK

Handle this dynamically at runtime by enabling all the necessary SMMU
clocks and check the IDR7 register for MMU-500 r2p0+. This must be
applied to both SMMUs on the platform: APPS and GPU.

While at it clean up the clock handling: Leave the SMMU clocks on
because the normal world will need it again while booting. But make
sure the vote register of the RPM co-processor does not keep these
clocks always-on. For some reasons some platforms seem to have a
non-zero reset value for GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE.

Change-Id: I34cf7d3f2db977b0930eb6e64a870ecaf02a7573
Signed-off-by: Stephan Gerhold <stephan@gerhold.net>
diff --git a/plat/qti/msm8916/include/msm8916_mmap.h b/plat/qti/msm8916/include/msm8916_mmap.h
index 35e3b86..dc420fc 100644
--- a/plat/qti/msm8916/include/msm8916_mmap.h
+++ b/plat/qti/msm8916/include/msm8916_mmap.h
@@ -22,6 +22,7 @@
 
 #define APPS_SMMU_BASE		(PCNOC_BASE + 0x1e00000)
 #define APPS_SMMU_QCOM		(APPS_SMMU_BASE + 0xf0000)
+#define GPU_SMMU_BASE		(PCNOC_BASE + 0x1f00000)
 
 #define BLSP1_BASE		(PCNOC_BASE + 0x7880000)
 #define BLSP1_UART_BASE(n)	(BLSP1_BASE + 0x2f000 + (((n) - 1) * 0x1000))
diff --git a/plat/qti/msm8916/msm8916_config.c b/plat/qti/msm8916/msm8916_config.c
index 22248db..8ae07a0 100644
--- a/plat/qti/msm8916/msm8916_config.c
+++ b/plat/qti/msm8916/msm8916_config.c
@@ -77,24 +77,66 @@
  * by default to avoid special setup on the non-secure side.
  */
 #define CLK_OFF					BIT_32(31)
+#define GCC_APSS_TCU_CBCR			(GCC_BASE + 0x12018)
+#define GCC_GFX_TCU_CBCR			(GCC_BASE + 0x12020)
 #define GCC_SMMU_CFG_CBCR			(GCC_BASE + 0x12038)
+#define GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE	(GCC_BASE + 0x3600c)
 #define GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE	(GCC_BASE + 0x4500c)
+#define APSS_TCU_CLK_ENA			BIT_32(1)
+#define GFX_TCU_CLK_ENA				BIT_32(2)
+#define GFX_TBU_CLK_ENA				BIT_32(3)
 #define SMMU_CFG_CLK_ENA			BIT_32(12)
 #define APPS_SMMU_INTR_SEL_NS			(APPS_SMMU_QCOM + 0x2000)
 #define APPS_SMMU_INTR_SEL_NS_EN_ALL		U(0xffffffff)
 
+#define SMMU_SACR				0x010
+#define SMMU_SACR_CACHE_LOCK			BIT_32(26)
+#define SMMU_IDR7				0x03c
+#define SMMU_IDR7_MINOR(val)			(((val) >> 0) & 0xf)
+#define SMMU_IDR7_MAJOR(val)			(((val) >> 4) & 0xf)
+
+static void msm8916_smmu_cache_unlock(uintptr_t smmu_base, uintptr_t clk_cbcr)
+{
+	uint32_t version;
+
+	/* Wait for clock */
+	while (mmio_read_32(clk_cbcr) & CLK_OFF) {
+	}
+
+	version = mmio_read_32(smmu_base + SMMU_IDR7);
+	VERBOSE("SMMU(0x%lx) r%dp%d\n", smmu_base,
+		SMMU_IDR7_MAJOR(version), SMMU_IDR7_MINOR(version));
+
+	/* For SMMU r2p0+ clear CACHE_LOCK to allow writes to CBn_ACTLR */
+	if (SMMU_IDR7_MAJOR(version) >= 2) {
+		mmio_clrbits_32(smmu_base + SMMU_SACR, SMMU_SACR_CACHE_LOCK);
+	}
+}
+
 static void msm8916_configure_smmu(void)
 {
-	/* Enable SMMU configuration clock to enable register access */
-	mmio_setbits_32(GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE, SMMU_CFG_CLK_ENA);
+	/* Enable SMMU clocks to enable register access */
+	mmio_write_32(GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE, SMMU_CFG_CLK_ENA |
+		      APSS_TCU_CLK_ENA | GFX_TCU_CLK_ENA | GFX_TBU_CLK_ENA);
+
+	/* Wait for configuration clock */
 	while (mmio_read_32(GCC_SMMU_CFG_CBCR) & CLK_OFF) {
 	}
 
 	/* Route all context bank interrupts to non-secure interrupt */
 	mmio_write_32(APPS_SMMU_INTR_SEL_NS, APPS_SMMU_INTR_SEL_NS_EN_ALL);
 
-	/* Disable configuration clock again */
-	mmio_clrbits_32(GCC_APCS_SMMU_CLOCK_BRANCH_ENA_VOTE, SMMU_CFG_CLK_ENA);
+	/* Clear sACR.CACHE_LOCK bit if needed for MMU-500 r2p0+ */
+	msm8916_smmu_cache_unlock(APPS_SMMU_BASE, GCC_APSS_TCU_CBCR);
+	msm8916_smmu_cache_unlock(GPU_SMMU_BASE, GCC_GFX_TCU_CBCR);
+
+	/*
+	 * Keep APCS vote for SMMU clocks for rest of booting process, but make
+	 * sure other vote registers (such as RPM) do not keep permanent votes.
+	 */
+	VERBOSE("Clearing GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE (was: 0x%x)\n",
+		mmio_read_32(GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE));
+	mmio_write_32(GCC_RPM_SMMU_CLOCK_BRANCH_ENA_VOTE, 0);
 }
 
 void msm8916_configure(void)