feat(gpt): add support for large GPT mappings

This patch adds support for large GPT mappings using
Contiguous descriptors. The maximum size of supported
contiguous block in MB is defined in RME_GPT_MAX_BLOCK
build parameter and takes values 0, 2, 32 and 512 and
by default set to 2 in make_helpers/defaults.mk.
Setting RME_GPT_MAX_BLOCK value to 0 disables use of
Contiguous descriptors.
Function gpt_tlbi_by_pa_ll() and its declaration
are removed from lib/aarch64/misc_helpers.S and
include/arch/aarch64/arch_helpers.h, because the
GPT library now uses tlbirpalos_xxx() functions.

Change-Id: Ia9a59bde1741c5666b4ca1de9324e6dfd6f734eb
Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
diff --git a/lib/gpt_rme/gpt_rme_private.h b/lib/gpt_rme/gpt_rme_private.h
index b2a5dae..4d2ab59 100644
--- a/lib/gpt_rme/gpt_rme_private.h
+++ b/lib/gpt_rme/gpt_rme_private.h
@@ -9,6 +9,7 @@
 
 #include <arch.h>
 #include <lib/gpt_rme/gpt_rme.h>
+#include <lib/spinlock.h>
 #include <lib/utils_def.h>
 
 /******************************************************************************/
@@ -19,7 +20,7 @@
 #define GPT_L0_TYPE_MASK		UL(0xF)
 #define GPT_L0_TYPE_SHIFT		U(0)
 
-/* For now, we don't support contiguous descriptors, only table and block */
+/* GPT level 0 table and block descriptors */
 #define GPT_L0_TYPE_TBL_DESC		UL(3)
 #define GPT_L0_TYPE_BLK_DESC		UL(1)
 
@@ -29,29 +30,63 @@
 #define GPT_L0_BLK_DESC_GPI_MASK	UL(0xF)
 #define GPT_L0_BLK_DESC_GPI_SHIFT	U(4)
 
-/* GPT level 1 descriptor bit definitions */
+/* GPT level 1 Contiguous descriptor */
+#define GPT_L1_TYPE_CONT_DESC_MASK	UL(0xF)
+#define GPT_L1_TYPE_CONT_DESC		UL(1)
+
+/* GPT level 1 Contiguous descriptor definitions */
+#define GPT_L1_CONTIG_2MB		UL(1)
+#define GPT_L1_CONTIG_32MB		UL(2)
+#define GPT_L1_CONTIG_512MB		UL(3)
+
+#define GPT_L1_CONT_DESC_GPI_SHIFT	U(4)
+#define GPT_L1_CONT_DESC_GPI_MASK	UL(0xF)
+#define GPT_L1_CONT_DESC_CONTIG_SHIFT	U(8)
+#define GPT_L1_CONT_DESC_CONTIG_MASK	UL(3)
+
+/* GPT level 1 Granules descriptor bit definitions */
 #define GPT_L1_GRAN_DESC_GPI_MASK	UL(0xF)
 
+/* L1 Contiguous descriptors templates */
+#define GPT_L1_CONT_DESC_2MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_2MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_32MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_32MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+#define GPT_L1_CONT_DESC_512MB	\
+			(GPT_L1_TYPE_CONT_DESC |	\
+			(GPT_L1_CONTIG_512MB << GPT_L1_CONT_DESC_CONTIG_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and template */
+#define GPT_L1_GPI_CONT_DESC(_gpi, _desc)	\
+			((_desc) | ((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from Granules descriptor and size */
+#define GPT_L1_CONT_DESC(_desc, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((_desc) & GPT_L1_GRAN_DESC_GPI_MASK) << \
+				GPT_L1_CONT_DESC_GPI_SHIFT))
+
+/* Create L1 Contiguous descriptor from GPI and size */
+#define GPT_L1_CONT_DESC_SIZE(_gpi, _size) \
+				(GPT_L1_CONT_DESC_##_size	| \
+				(((uint64_t)(_gpi) << GPT_L1_CONT_DESC_GPI_SHIFT))
+
+#define GPT_L1_GPI_BYTE(_gpi)		(uint64_t)((_gpi) | ((_gpi) << 4))
+#define GPT_L1_GPI_HALF(_gpi)		(GPT_L1_GPI_BYTE(_gpi) | (GPT_L1_GPI_BYTE(_gpi) << 8))
+#define GPT_L1_GPI_WORD(_gpi)		(GPT_L1_GPI_HALF(_gpi) | (GPT_L1_GPI_HALF(_gpi) << 16))
+
 /*
- * This macro fills out every GPI entry in a granules descriptor to the same
- * value.
+ * This macro generates a Granules descriptor
+ * with the same value for every GPI entry.
  */
-#define GPT_BUILD_L1_DESC(_gpi)		(((uint64_t)(_gpi) << 4*0) | \
-					 ((uint64_t)(_gpi) << 4*1) | \
-					 ((uint64_t)(_gpi) << 4*2) | \
-					 ((uint64_t)(_gpi) << 4*3) | \
-					 ((uint64_t)(_gpi) << 4*4) | \
-					 ((uint64_t)(_gpi) << 4*5) | \
-					 ((uint64_t)(_gpi) << 4*6) | \
-					 ((uint64_t)(_gpi) << 4*7) | \
-					 ((uint64_t)(_gpi) << 4*8) | \
-					 ((uint64_t)(_gpi) << 4*9) | \
-					 ((uint64_t)(_gpi) << 4*10) | \
-					 ((uint64_t)(_gpi) << 4*11) | \
-					 ((uint64_t)(_gpi) << 4*12) | \
-					 ((uint64_t)(_gpi) << 4*13) | \
-					 ((uint64_t)(_gpi) << 4*14) | \
-					 ((uint64_t)(_gpi) << 4*15))
+#define GPT_BUILD_L1_DESC(_gpi)		(GPT_L1_GPI_WORD(_gpi) | (GPT_L1_GPI_WORD(_gpi) << 32))
+
+#define GPT_L1_SECURE_DESC	GPT_BUILD_L1_DESC(GPT_GPI_SECURE)
+#define GPT_L1_NS_DESC		GPT_BUILD_L1_DESC(GPT_GPI_NS)
+#define GPT_L1_REALM_DESC	GPT_BUILD_L1_DESC(GPT_GPI_REALM)
+#define GPT_L1_ANY_DESC		GPT_BUILD_L1_DESC(GPT_GPI_ANY)
 
 /******************************************************************************/
 /* GPT platform configuration                                                 */
@@ -106,17 +141,44 @@
 	PGS_64KB_P =	16U
 } gpt_p_val_e;
 
+#define LOCK_SIZE	sizeof(((bitlock_t *)NULL)->lock)
+#define LOCK_TYPE	typeof(((bitlock_t *)NULL)->lock)
+#define LOCK_BITS	(LOCK_SIZE * 8U)
+
 /*
- * Internal structure to retrieve the values from get_gpi_info();
+ * Internal structure to retrieve the values from get_gpi_params();
  */
-typedef struct gpi_info {
+typedef struct {
 	uint64_t gpt_l1_desc;
 	uint64_t *gpt_l1_addr;
 	unsigned int idx;
 	unsigned int gpi_shift;
 	unsigned int gpi;
+	bitlock_t *lock;
+	LOCK_TYPE mask;
 } gpi_info_t;
 
+/*
+ * Look up structure for contiguous blocks and descriptors
+ */
+typedef struct {
+	size_t size;
+	unsigned int desc;
+} gpt_fill_lookup_t;
+
+typedef void (*gpt_shatter_func)(uintptr_t base, const gpi_info_t *gpi_info,
+					uint64_t l1_desc);
+typedef void (*gpt_tlbi_func)(uintptr_t base);
+
+/*
+ * Look-up structure for
+ * invalidating TLBs of GPT entries by Physical address, last level.
+ */
+typedef struct {
+	gpt_tlbi_func function;
+	size_t mask;
+} gpt_tlbi_lookup_t;
+
 /* Max valid value for PGS */
 #define GPT_PGS_MAX			(2U)
 
@@ -136,8 +198,8 @@
  * special case we'll get a negative width value which does not make sense and
  * would cause problems.
  */
-#define GPT_L0_IDX_WIDTH(_t)		(((_t) > GPT_S_VAL) ? \
-					((_t) - GPT_S_VAL) : (0U))
+#define GPT_L0_IDX_WIDTH(_t)		(((unsigned int)(_t) > GPT_S_VAL) ? \
+					((unsigned int)(_t) - GPT_S_VAL) : (0U))
 
 /* Bit shift for the L0 index field in a PA */
 #define GPT_L0_IDX_SHIFT		(GPT_S_VAL)
@@ -173,10 +235,11 @@
  * the L0 index field above since all valid combinations of PGS (p) and L0GPTSZ
  * (s) will result in a positive width value.
  */
-#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - ((_p) + 3U))
+#define GPT_L1_IDX_WIDTH(_p)		((GPT_S_VAL - 1U) - \
+					((unsigned int)(_p) + 3U))
 
 /* Bit shift for the L1 index field */
-#define GPT_L1_IDX_SHIFT(_p)		((_p) + 4U)
+#define GPT_L1_IDX_SHIFT(_p)		((unsigned int)(_p) + 4U)
 
 /*
  * Mask for the L1 index field, must be shifted.
@@ -196,7 +259,10 @@
 #define GPT_L1_GPI_IDX_MASK		(0xF)
 
 /* Total number of entries in each L1 table */
-#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1U)
+#define GPT_L1_ENTRY_COUNT(_p)		((GPT_L1_IDX_MASK(_p)) + 1UL)
+
+/* Number of L1 entries in 2MB block */
+#define GPT_L1_ENTRY_COUNT_2MB(_p)	(SZ_2M >> GPT_L1_IDX_SHIFT(_p))
 
 /* Total size in bytes of each L1 table */
 #define GPT_L1_TABLE_SIZE(_p)		((GPT_L1_ENTRY_COUNT(_p)) << 3U)
@@ -206,10 +272,13 @@
 /******************************************************************************/
 
 /* Protected space actual size in bytes */
-#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (_t))
+#define GPT_PPS_ACTUAL_SIZE(_t)	(1UL << (unsigned int)(_t))
 
 /* Granule actual size in bytes */
-#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (_p))
+#define GPT_PGS_ACTUAL_SIZE(_p)	(1UL << (unsigned int)(_p))
+
+/* Number of granules in 2MB block */
+#define GPT_PGS_COUNT_2MB(_p)	(1UL << (21U - (unsigned int)(_p)))
 
 /* L0 GPT region size in bytes */
 #define GPT_L0GPTSZ_ACTUAL_SIZE	(1UL << GPT_S_VAL)
@@ -221,7 +290,8 @@
  * This definition is used to determine if a physical address lies on an L0
  * region boundary.
  */
-#define GPT_IS_L0_ALIGNED(_pa)	(((_pa) & (GPT_L0_REGION_SIZE - U(1))) == U(0))
+#define GPT_IS_L0_ALIGNED(_pa)	\
+	(((_pa) & (GPT_L0_REGION_SIZE - UL(1))) == UL(0))
 
 /* Get the type field from an L0 descriptor */
 #define GPT_L0_TYPE(_desc)	(((_desc) >> GPT_L0_TYPE_SHIFT) & \
@@ -246,16 +316,43 @@
 				(GPT_L0_TBL_DESC_L1ADDR_MASK << \
 				GPT_L0_TBL_DESC_L1ADDR_SHIFT))))
 
+/* Get the GPI from L1 Contiguous descriptor */
+#define GPT_L1_CONT_GPI(_desc)		\
+	(((_desc) >> GPT_L1_CONT_DESC_GPI_SHIFT) & GPT_L1_CONT_DESC_GPI_MASK)
+
+/* Get the GPI from L1 Granules descriptor */
+#define GPT_L1_GRAN_GPI(_desc)	((_desc) & GPT_L1_GRAN_DESC_GPI_MASK)
+
+/* Get the Contig from L1 Contiguous descriptor */
+#define GPT_L1_CONT_CONTIG(_desc)	\
+	(((_desc) >> GPT_L1_CONT_DESC_CONTIG_SHIFT) & \
+					GPT_L1_CONT_DESC_CONTIG_MASK)
+
 /* Get the index into the L1 table from a physical address */
-#define GPT_L1_IDX(_p, _pa)	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & \
-				GPT_L1_IDX_MASK(_p))
+#define GPT_L1_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_IDX_SHIFT(_p)) & GPT_L1_IDX_MASK(_p))
 
 /* Get the index of the GPI within an L1 table entry from a physical address */
-#define GPT_L1_GPI_IDX(_p, _pa)	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & \
-				GPT_L1_GPI_IDX_MASK)
+#define GPT_L1_GPI_IDX(_p, _pa)		\
+	(((_pa) >> GPT_L1_GPI_IDX_SHIFT(_p)) & GPT_L1_GPI_IDX_MASK)
 
 /* Determine if an address is granule-aligned */
-#define GPT_IS_L1_ALIGNED(_p, _pa) (((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - U(1))) \
-				   == U(0))
+#define GPT_IS_L1_ALIGNED(_p, _pa)	\
+	(((_pa) & (GPT_PGS_ACTUAL_SIZE(_p) - UL(1))) == UL(0))
+
+/* Get aligned addresses */
+#define ALIGN_2MB(_addr)	((_addr) & ~(SZ_2M - 1UL))
+#define ALIGN_32MB(_addr)	((_addr) & ~(SZ_32M - 1UL))
+#define ALIGN_512MB(_addr)	((_addr) & ~(SZ_512M - 1UL))
+
+/* Determine if region is contiguous */
+#define GPT_REGION_IS_CONT(_len, _addr, _size)	\
+	(((_len) >= (_size)) && (((_addr) & ((_size) - UL(1))) == UL(0)))
+
+/* Get 32MB block number in 512MB block: 0-15 */
+#define GET_32MB_NUM(_addr)	((_addr >> 25) & 0xF)
+
+/* Get 2MB block number in 32MB block: 0-15 */
+#define GET_2MB_NUM(_addr)	((_addr >> 21) & 0xF)
 
 #endif /* GPT_RME_PRIVATE_H */