feat(gpt): configure memory size protected by bitlock

This patch adds support in GPT library for configuration
of the memory block size protected by one bit of 'bitlock'
structure. Build option 'RME_GPT_BITLOCK_BLOCK' defines the
number of 512MB blocks covered by each bit. This numeric
parameter must be a power of 2 and can take the values from
0 to 512. Setting this value to 0 chooses a single spinlock
for all GPT L1 table entries. The default value is set to 1
which corresponds to 512MB per bit.

Change-Id: I710d178072894a3ef40daebea701f74d19e8a3d7
Signed-off-by: AlexeiFedorov <Alexei.Fedorov@arm.com>
diff --git a/Makefile b/Makefile
index b6093e7..f7229f5 100644
--- a/Makefile
+++ b/Makefile
@@ -1331,6 +1331,7 @@
 	PSCI_EXTENDED_STATE_ID \
 	PSCI_OS_INIT_MODE \
 	RESET_TO_BL31 \
+	RME_GPT_BITLOCK_BLOCK \
 	RME_GPT_MAX_BLOCK \
 	SEPARATE_CODE_AND_RODATA \
 	SEPARATE_BL2_NOLOAD_REGION \
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 0d4261d..82772d6 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -812,7 +812,17 @@
    instead of the BL1 entrypoint. It can take the value 0 (CPU reset to BL1
    entrypoint) or 1 (CPU reset to SP_MIN entrypoint). The default value is 0.
 
--  ``RME_GPT_MAX_BLOCK``: Numeric value in MB to define maximum size of
+-  ``RME_GPT_BITLOCK_BLOCK``: This defines the block size (in number of 512MB
+-  blocks) covered by a single bit of the bitlock structure during RME GPT
+-  operations. The lower the block size, the better opportunity for
+-  parallelising GPT operations but at the cost of more bits being needed
+-  for the bitlock structure. This numeric parameter can take the values
+-  from 0 to 512 and must be a power of 2. The value of 0 is special and
+-  and it chooses a single spinlock for all GPT L1 table entries. Default
+-  value is 1 which corresponds to block size of 512MB per bit of bitlock
+-  structure.
+
+-  ``RME_GPT_MAX_BLOCK``: Numeric value in MB to define the maximum size of
    supported contiguous blocks in GPT Library. This parameter can take the
    values 0, 2, 32 and 512. Setting this value to 0 disables use of Contigious
    descriptors. Default value is 2.
diff --git a/lib/gpt_rme/gpt_rme.c b/lib/gpt_rme/gpt_rme.c
index 7dec548..4d80373 100644
--- a/lib/gpt_rme/gpt_rme.c
+++ b/lib/gpt_rme/gpt_rme.c
@@ -127,9 +127,35 @@
 static uintptr_t gpt_l1_tbl;
 
 /* These variable is used during runtime */
+#if (RME_GPT_BITLOCK_BLOCK == 0)
+/*
+ * The GPTs are protected by a global spinlock to ensure
+ * that multiple CPUs do not attempt to change the descriptors at once.
+ */
+static spinlock_t gpt_lock;
+#else
 
-/* Bitlock base address for each 512 MB block of PPS */
+/* Bitlocks base address */
 static bitlock_t *gpt_bitlock_base;
+#endif
+
+/* Lock/unlock macros for GPT entries */
+#if (RME_GPT_BITLOCK_BLOCK == 0)
+/*
+ * Access to GPT is controlled by a global lock to ensure
+ * that no more than one CPU is allowed to make changes at any
+ * given time.
+ */
+#define GPT_LOCK	spin_lock(&gpt_lock)
+#define GPT_UNLOCK	spin_unlock(&gpt_lock)
+#else
+/*
+ * Access to a block of memory is controlled by a bitlock.
+ * Size of block = RME_GPT_BITLOCK_BLOCK * 512MB.
+ */
+#define GPT_LOCK	bit_lock(gpi_info.lock, gpi_info.mask)
+#define GPT_UNLOCK	bit_unlock(gpi_info.lock, gpi_info.mask)
+#endif
 
 static void tlbi_page_dsbosh(uintptr_t base)
 {
@@ -477,7 +503,7 @@
 static int validate_l0_params(gpccr_pps_e pps, uintptr_t l0_mem_base,
 				size_t l0_mem_size)
 {
-	size_t l0_alignment, locks_size;
+	size_t l0_alignment, locks_size = 0;
 
 	/*
 	 * Make sure PPS is valid and then store it since macros need this value
@@ -503,28 +529,28 @@
 		return -EFAULT;
 	}
 
-	/* Check size */
-	if (l0_mem_size < GPT_L0_TABLE_SIZE(gpt_config.t)) {
-		ERROR("%sL0%s\n", (const char *)"GPT: Inadequate ",
-			(const char *)" memory\n");
-		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-			GPT_L0_TABLE_SIZE(gpt_config.t), l0_mem_size);
-		return -ENOMEM;
-	}
-
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	/*
 	 * Size of bitlocks in bytes for the protected address space
-	 * with 512MB per bitlock.
+	 * with RME_GPT_BITLOCK_BLOCK * 512MB per bitlock.
 	 */
-	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) / (SZ_512M * 8U);
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) /
+			(RME_GPT_BITLOCK_BLOCK * SZ_512M * 8U);
 
-	/* Check space for bitlocks */
-	if (locks_size > (l0_mem_size - GPT_L0_TABLE_SIZE(gpt_config.t))) {
-		ERROR("%sbitlock%s", (const char *)"GPT: Inadequate ",
-			(const char *)" memory\n");
+	/*
+	 * If protected space size is less than the size covered
+	 * by 'bitlock' structure, check for a single bitlock.
+	 */
+	if (locks_size < LOCK_SIZE) {
+		locks_size = LOCK_SIZE;
+	}
+#endif
+	/* Check size for L0 tables and bitlocks */
+	if (l0_mem_size < (GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size)) {
+		ERROR("GPT: Inadequate L0 memory\n");
 		ERROR("      Expected 0x%lx bytes, got 0x%lx bytes\n",
-			locks_size,
-			l0_mem_size - GPT_L0_TABLE_SIZE(gpt_config.t));
+			GPT_L0_TABLE_SIZE(gpt_config.t) + locks_size,
+			l0_mem_size);
 		return -ENOMEM;
 	}
 
@@ -1089,8 +1115,8 @@
 		       size_t l0_mem_size)
 {
 	uint64_t gpt_desc;
-	size_t locks_size;
-	bitlock_t *bit_locks;
+	size_t locks_size = 0;
+	__unused bitlock_t *bit_locks;
 	int ret;
 
 	/* Ensure that MMU and Data caches are enabled */
@@ -1110,16 +1136,27 @@
 		((uint64_t *)l0_mem_base)[i] = gpt_desc;
 	}
 
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	/* Initialise bitlocks at the end of L0 table */
 	bit_locks = (bitlock_t *)(l0_mem_base +
 					GPT_L0_TABLE_SIZE(gpt_config.t));
 
 	/* Size of bitlocks in bytes */
-	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) / (SZ_512M * 8U);
+	locks_size = GPT_PPS_ACTUAL_SIZE(gpt_config.t) /
+					(RME_GPT_BITLOCK_BLOCK * SZ_512M * 8U);
+
+	/*
+	 * If protected space size is less than the size covered
+	 * by 'bitlock' structure, initialise a single bitlock.
+	 */
+	if (locks_size < LOCK_SIZE) {
+		locks_size = LOCK_SIZE;
+	}
 
 	for (size_t i = 0UL; i < (locks_size/LOCK_SIZE); i++) {
 		bit_locks[i].lock = 0U;
 	}
+#endif
 
 	/* Flush updated L0 tables and bitlocks to memory */
 	flush_dcache_range((uintptr_t)l0_mem_base,
@@ -1290,17 +1327,19 @@
 	/* Mask for the L1 index field */
 	gpt_l1_index_mask = GPT_L1_IDX_MASK(gpt_config.p);
 
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	/* Bitlocks at the end of L0 table */
 	gpt_bitlock_base = (bitlock_t *)(gpt_config.plat_gpt_l0_base +
 					GPT_L0_TABLE_SIZE(gpt_config.t));
-
+#endif
 	VERBOSE("GPT: Runtime Configuration\n");
 	VERBOSE("  PPS/T:     0x%x/%u\n", gpt_config.pps, gpt_config.t);
 	VERBOSE("  PGS/P:     0x%x/%u\n", gpt_config.pgs, gpt_config.p);
 	VERBOSE("  L0GPTSZ/S: 0x%x/%u\n", GPT_L0GPTSZ, GPT_S_VAL);
 	VERBOSE("  L0 base:   0x%"PRIxPTR"\n", gpt_config.plat_gpt_l0_base);
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	VERBOSE("  Bitlocks:  0x%"PRIxPTR"\n", (uintptr_t)gpt_bitlock_base);
-
+#endif
 	return 0;
 }
 
@@ -1326,7 +1365,7 @@
 static int get_gpi_params(uint64_t base, gpi_info_t *gpi_info)
 {
 	uint64_t gpt_l0_desc, *gpt_l0_base;
-	unsigned int idx_512;
+	__unused unsigned int block_idx;
 
 	gpt_l0_base = (uint64_t *)gpt_config.plat_gpt_l0_base;
 	gpt_l0_desc = gpt_l0_base[GPT_L0_IDX(base)];
@@ -1341,19 +1380,20 @@
 	gpi_info->idx = (unsigned int)GPT_L1_INDEX(base);
 	gpi_info->gpi_shift = GPT_L1_GPI_IDX(gpt_config.p, base) << 2;
 
-	/* 512MB block index */
-	idx_512 = (unsigned int)(base / SZ_512M);
+#if (RME_GPT_BITLOCK_BLOCK != 0)
+	/* Block index */
+	block_idx = (unsigned int)(base / (RME_GPT_BITLOCK_BLOCK * SZ_512M));
 
 	/* Bitlock address and mask */
-	gpi_info->lock = &gpt_bitlock_base[idx_512 / LOCK_BITS];
-	gpi_info->mask = 1U << (idx_512 & (LOCK_BITS - 1U));
-
+	gpi_info->lock = &gpt_bitlock_base[block_idx / LOCK_BITS];
+	gpi_info->mask = 1U << (block_idx & (LOCK_BITS - 1U));
+#endif
 	return 0;
 }
 
 /*
  * Helper to retrieve the gpt_l1_desc and GPI information from gpi_info.
- * This function is called with bitlock acquired.
+ * This function is called with bitlock or spinlock acquired.
  */
 static void read_gpi(gpi_info_t *gpi_info)
 {
@@ -1716,12 +1756,10 @@
 	}
 
 	/*
-	 * Access to each 512MB block in L1 tables is controlled by a bitlock
-	 * to ensure that no more than one CPU is allowed to make changes at
-	 * any given time.
+	 * Access to GPT is controlled by a lock to ensure that no more
+	 * than one CPU is allowed to make changes at any given time.
 	 */
-	bit_lock(gpi_info.lock, gpi_info.mask);
-
+	GPT_LOCK;
 	read_gpi(&gpi_info);
 
 	/* Check that the current address is in NS state */
@@ -1729,7 +1767,7 @@
 		VERBOSE("GPT: Only Granule in NS state can be delegated.\n");
 		VERBOSE("      Caller: %u, Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		bit_unlock(gpi_info.lock, gpi_info.mask);
+		GPT_UNLOCK;
 		return -EPERM;
 	}
 
@@ -1766,8 +1804,8 @@
 	}
 #endif
 
-	/* Unlock access to 512MB block */
-	bit_unlock(gpi_info.lock, gpi_info.mask);
+	/* Unlock the lock to GPT */
+	GPT_UNLOCK;
 
 	/*
 	 * The isb() will be done as part of context
@@ -1838,12 +1876,10 @@
 	}
 
 	/*
-	 * Access to each 512MB block in L1 tables is controlled by a bitlock
-	 * to ensure that no more than one CPU is allowed to make changes at
-	 * any given time.
+	 * Access to GPT is controlled by a lock to ensure that no more
+	 * than one CPU is allowed to make changes at any given time.
 	 */
-	bit_lock(gpi_info.lock, gpi_info.mask);
-
+	GPT_LOCK;
 	read_gpi(&gpi_info);
 
 	/* Check that the current address is in the delegated state */
@@ -1859,7 +1895,7 @@
 		VERBOSE("GPT: Only Granule in REALM or SECURE state can be undelegated\n");
 		VERBOSE("      Caller: %u Current GPI: %u\n", src_sec_state,
 			gpi_info.gpi);
-		bit_unlock(gpi_info.lock, gpi_info.mask);
+		GPT_UNLOCK;
 		return -EPERM;
 	}
 
@@ -1906,8 +1942,8 @@
 		fuse_block(base, &gpi_info, GPT_L1_NS_DESC);
 	}
 #endif
-	/* Unlock access to 512MB block */
-	bit_unlock(gpi_info.lock, gpi_info.mask);
+	/* Unlock the lock to GPT */
+	GPT_UNLOCK;
 
 	/*
 	 * The isb() will be done as part of context
diff --git a/lib/gpt_rme/gpt_rme.mk b/lib/gpt_rme/gpt_rme.mk
index 52b38d2..7d6b61f 100644
--- a/lib/gpt_rme/gpt_rme.mk
+++ b/lib/gpt_rme/gpt_rme.mk
@@ -4,6 +4,15 @@
 # SPDX-License-Identifier: BSD-3-Clause
 #
 
+# Process RME_GPT_BITLOCK_BLOCK value
+ifeq ($(filter 0 1 2 4 8 16 32 64 128 256 512, ${RME_GPT_BITLOCK_BLOCK}),)
+    $(error "Invalid value for RME_GPT_BITLOCK_BLOCK: ${RME_GPT_BITLOCK_BLOCK}")
+endif
+
+ifeq (${RME_GPT_BITLOCK_BLOCK},0)
+    $(warning "GPT library uses global spinlock")
+endif
+
 # Process RME_GPT_MAX_BLOCK value
 ifeq ($(filter 0 2 32 512, ${RME_GPT_MAX_BLOCK}),)
     $(error "Invalid value for RME_GPT_MAX_BLOCK: ${RME_GPT_MAX_BLOCK}")
diff --git a/lib/gpt_rme/gpt_rme_private.h b/lib/gpt_rme/gpt_rme_private.h
index 4d2ab59..31dad20 100644
--- a/lib/gpt_rme/gpt_rme_private.h
+++ b/lib/gpt_rme/gpt_rme_private.h
@@ -154,8 +154,10 @@
 	unsigned int idx;
 	unsigned int gpi_shift;
 	unsigned int gpi;
+#if (RME_GPT_BITLOCK_BLOCK != 0)
 	bitlock_t *lock;
 	LOCK_TYPE mask;
+#endif
 } gpi_info_t;
 
 /*
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index e742d6c..a5c78ae 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -139,6 +139,9 @@
 # For Chain of Trust
 GENERATE_COT			:= 0
 
+# Default number of 512 blocks per bitlock
+RME_GPT_BITLOCK_BLOCK		:= 1
+
 # Default maximum size of GPT contiguous block
 RME_GPT_MAX_BLOCK		:= 2