Merge pull request #393 from mtk09422/misc-updates
mt8173: Update SPM and fix watchdog setting
diff --git a/bl2/aarch64/bl2_entrypoint.S b/bl2/aarch64/bl2_entrypoint.S
index 987d30e..1d26229 100644
--- a/bl2/aarch64/bl2_entrypoint.S
+++ b/bl2/aarch64/bl2_entrypoint.S
@@ -82,6 +82,20 @@
b.ne _panic
/* ---------------------------------------------
+ * Invalidate the RW memory used by the BL2
+ * image. This includes the data and NOBITS
+ * sections. This is done to safeguard against
+ * possible corruption of this memory by dirty
+ * cache lines in a system cache as a result of
+ * use by an earlier boot loader stage.
+ * ---------------------------------------------
+ */
+ adr x0, __RW_START__
+ adr x1, __RW_END__
+ sub x1, x1, x0
+ bl inv_dcache_range
+
+ /* ---------------------------------------------
* Zero out NOBITS sections. There are 2 of them:
* - the .bss section;
* - the coherent memory section.
diff --git a/bl2/bl2.ld.S b/bl2/bl2.ld.S
index 33588e6..a660bda 100644
--- a/bl2/bl2.ld.S
+++ b/bl2/bl2.ld.S
@@ -68,6 +68,12 @@
__RO_END__ = .;
} >RAM
+ /*
+ * Define a linker symbol to mark start of the RW memory area for this
+ * image.
+ */
+ __RW_START__ = . ;
+
.data . : {
__DATA_START__ = .;
*(.data*)
@@ -121,6 +127,11 @@
} >RAM
#endif
+ /*
+ * Define a linker symbol to mark end of the RW memory area for this
+ * image.
+ */
+ __RW_END__ = .;
__BL2_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/aarch64/bl31_entrypoint.S b/bl31/aarch64/bl31_entrypoint.S
index 5ba0f9c..636b1d2 100644
--- a/bl31/aarch64/bl31_entrypoint.S
+++ b/bl31/aarch64/bl31_entrypoint.S
@@ -113,5 +113,22 @@
*/
bl bl31_main
+ /* -------------------------------------------------------------
+ * Clean the .data & .bss sections to main memory. This ensures
+ * that any global data which was initialised by the primary CPU
+ * is visible to secondary CPUs before they enable their data
+ * caches and participate in coherency.
+ * -------------------------------------------------------------
+ */
+ adr x0, __DATA_START__
+ adr x1, __DATA_END__
+ sub x1, x1, x0
+ bl clean_dcache_range
+
+ adr x0, __BSS_START__
+ adr x1, __BSS_END__
+ sub x1, x1, x0
+ bl clean_dcache_range
+
b el3_exit
endfunc bl31_entrypoint
diff --git a/bl31/bl31.ld.S b/bl31/bl31.ld.S
index 3327f31..e572f9b 100644
--- a/bl31/bl31.ld.S
+++ b/bl31/bl31.ld.S
@@ -81,6 +81,12 @@
ASSERT(__CPU_OPS_END__ > __CPU_OPS_START__,
"cpu_ops not defined for this platform.")
+ /*
+ * Define a linker symbol to mark start of the RW memory area for this
+ * image.
+ */
+ __RW_START__ = . ;
+
.data . : {
__DATA_START__ = .;
*(.data*)
@@ -101,10 +107,31 @@
* The .bss section gets initialised to 0 at runtime.
* Its base address must be 16-byte aligned.
*/
- .bss : ALIGN(16) {
+ .bss (NOLOAD) : ALIGN(16) {
__BSS_START__ = .;
*(.bss*)
*(COMMON)
+#if !USE_COHERENT_MEM
+ /*
+ * Bakery locks are stored in normal .bss memory
+ *
+ * Each lock's data is spread across multiple cache lines, one per CPU,
+ * but multiple locks can share the same cache line.
+ * The compiler will allocate enough memory for one CPU's bakery locks,
+ * the remaining cache lines are allocated by the linker script
+ */
+ . = ALIGN(CACHE_WRITEBACK_GRANULE);
+ __BAKERY_LOCK_START__ = .;
+ *(bakery_lock)
+ . = ALIGN(CACHE_WRITEBACK_GRANULE);
+ __PERCPU_BAKERY_LOCK_SIZE__ = ABSOLUTE(. - __BAKERY_LOCK_START__);
+ . = . + (__PERCPU_BAKERY_LOCK_SIZE__ * (PLATFORM_CORE_COUNT - 1));
+ __BAKERY_LOCK_END__ = .;
+#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE
+ ASSERT(__PERCPU_BAKERY_LOCK_SIZE__ == PLAT_PERCPU_BAKERY_LOCK_SIZE,
+ "PLAT_PERCPU_BAKERY_LOCK_SIZE does not match bakery lock requirements");
+#endif
+#endif
__BSS_END__ = .;
} >RAM
@@ -126,6 +153,12 @@
*/
coherent_ram (NOLOAD) : ALIGN(4096) {
__COHERENT_RAM_START__ = .;
+ /*
+ * Bakery locks are stored in coherent memory
+ *
+ * Each lock's data is contiguous and fully allocated by the compiler
+ */
+ *(bakery_lock)
*(tzfw_coherent_mem)
__COHERENT_RAM_END_UNALIGNED__ = .;
/*
@@ -138,6 +171,11 @@
} >RAM
#endif
+ /*
+ * Define a linker symbol to mark end of the RW memory area for this
+ * image.
+ */
+ __RW_END__ = .;
__BL31_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
diff --git a/bl31/bl31_main.c b/bl31/bl31_main.c
index a1a3710..a244a5c 100644
--- a/bl31/bl31_main.c
+++ b/bl31/bl31_main.c
@@ -87,9 +87,6 @@
INFO("BL3-1: Initializing runtime services\n");
runtime_svc_init();
- /* Clean caches before re-entering normal world */
- dcsw_op_all(DCCSW);
-
/*
* All the cold boot actions on the primary cpu are done. We now need to
* decide which is the next image (BL32 or BL33) and how to execute it.
diff --git a/bl32/tsp/aarch64/tsp_entrypoint.S b/bl32/tsp/aarch64/tsp_entrypoint.S
index 4e8da74..9732ff2 100644
--- a/bl32/tsp/aarch64/tsp_entrypoint.S
+++ b/bl32/tsp/aarch64/tsp_entrypoint.S
@@ -99,6 +99,20 @@
isb
/* ---------------------------------------------
+ * Invalidate the RW memory used by the BL32
+ * image. This includes the data and NOBITS
+ * sections. This is done to safeguard against
+ * possible corruption of this memory by dirty
+ * cache lines in a system cache as a result of
+ * use by an earlier boot loader stage.
+ * ---------------------------------------------
+ */
+ adr x0, __RW_START__
+ adr x1, __RW_END__
+ sub x1, x1, x0
+ bl inv_dcache_range
+
+ /* ---------------------------------------------
* Zero out NOBITS sections. There are 2 of them:
* - the .bss section;
* - the coherent memory section.
diff --git a/bl32/tsp/tsp.ld.S b/bl32/tsp/tsp.ld.S
index d411ad0..41c4b4a 100644
--- a/bl32/tsp/tsp.ld.S
+++ b/bl32/tsp/tsp.ld.S
@@ -62,6 +62,12 @@
__RO_END__ = .;
} >RAM
+ /*
+ * Define a linker symbol to mark start of the RW memory area for this
+ * image.
+ */
+ __RW_START__ = . ;
+
.data . : {
__DATA_START__ = .;
*(.data*)
@@ -119,6 +125,11 @@
} >RAM
#endif
+ /*
+ * Define a linker symbol to mark the end of the RW memory area for this
+ * image.
+ */
+ __RW_END__ = .;
__BL32_END__ = .;
__BSS_SIZE__ = SIZEOF(.bss);
diff --git a/docs/firmware-design.md b/docs/firmware-design.md
index 18f634f..41fb7c0 100644
--- a/docs/firmware-design.md
+++ b/docs/firmware-design.md
@@ -1523,38 +1523,52 @@
The below sections analyze the data structures allocated in the coherent memory
region and the changes required to allocate them in normal memory.
-### PSCI Affinity map nodes
+### Coherent memory usage in PSCI implementation
-The `psci_aff_map` data structure stores the hierarchial node information for
-each affinity level in the system including the PSCI states associated with them.
-By default, this data structure is allocated in the coherent memory region in
-the Trusted Firmware because it can be accessed by multiple CPUs, either with
-their caches enabled or disabled.
+The `psci_non_cpu_pd_nodes` data structure stores the platform's power domain
+tree information for state management of power domains. By default, this data
+structure is allocated in the coherent memory region in the Trusted Firmware
+because it can be accessed by multple CPUs, either with caches enabled or
+disabled.
- typedef struct aff_map_node {
- unsigned long mpidr;
- unsigned char ref_count;
- unsigned char state;
- unsigned char level;
- #if USE_COHERENT_MEM
- bakery_lock_t lock;
- #else
- unsigned char aff_map_index;
- #endif
- } aff_map_node_t;
+typedef struct non_cpu_pwr_domain_node {
+ /*
+ * Index of the first CPU power domain node level 0 which has this node
+ * as its parent.
+ */
+ unsigned int cpu_start_idx;
+
+ /*
+ * Number of CPU power domains which are siblings of the domain indexed
+ * by 'cpu_start_idx' i.e. all the domains in the range 'cpu_start_idx
+ * -> cpu_start_idx + ncpus' have this node as their parent.
+ */
+ unsigned int ncpus;
+
+ /*
+ * Index of the parent power domain node.
+ * TODO: Figure out whether to whether using pointer is more efficient.
+ */
+ unsigned int parent_node;
+
+ plat_local_state_t local_state;
+
+ unsigned char level;
+
+ /* For indexing the psci_lock array*/
+ unsigned char lock_index;
+} non_cpu_pd_node_t;
In order to move this data structure to normal memory, the use of each of its
-fields must be analyzed. Fields like `mpidr` and `level` are only written once
-during cold boot. Hence removing them from coherent memory involves only doing
-a clean and invalidate of the cache lines after these fields are written.
+fields must be analyzed. Fields like `cpu_start_idx`, `ncpus`, `parent_node`
+`level` and `lock_index` are only written once during cold boot. Hence removing
+them from coherent memory involves only doing a clean and invalidate of the
+cache lines after these fields are written.
-The fields `state` and `ref_count` can be concurrently accessed by multiple
-CPUs in different cache states. A Lamport's Bakery lock is used to ensure mutual
-exlusion to these fields. As a result, it is possible to move these fields out
-of coherent memory by performing software cache maintenance on them. The field
-`lock` is the bakery lock data structure when `USE_COHERENT_MEM` is enabled.
-The `aff_map_index` is used to identify the bakery lock when `USE_COHERENT_MEM`
-is disabled.
+The field `local_state` can be concurrently accessed by multiple CPUs in
+different cache states. A Lamport's Bakery lock `psci_locks` is used to ensure
+mutual exlusion to this field and a clean and invalidate is needed after it
+is written.
### Bakery lock data
@@ -1563,9 +1577,13 @@
defined as follows:
typedef struct bakery_lock {
- int owner;
- volatile char entering[BAKERY_LOCK_MAX_CPUS];
- volatile unsigned number[BAKERY_LOCK_MAX_CPUS];
+ /*
+ * The lock_data is a bit-field of 2 members:
+ * Bit[0] : choosing. This field is set when the CPU is
+ * choosing its bakery number.
+ * Bits[1 - 15] : number. This is the bakery number allocated.
+ */
+ volatile uint16_t lock_data[BAKERY_LOCK_MAX_CPUS];
} bakery_lock_t;
It is a characteristic of Lamport's Bakery algorithm that the volatile per-CPU
@@ -1589,17 +1607,14 @@
To use bakery locks when `USE_COHERENT_MEM` is disabled, the lock data structure
has been redesigned. The changes utilise the characteristic of Lamport's Bakery
-algorithm mentioned earlier. The per-CPU fields of the new lock structure are
-aligned such that they are allocated on separate cache lines. The per-CPU data
-framework in Trusted Firmware is used to achieve this. This enables software to
+algorithm mentioned earlier. The bakery_lock structure only allocates the memory
+for a single CPU. The macro `DEFINE_BAKERY_LOCK` allocates all the bakery locks
+needed for a CPU into a section `bakery_lock`. The linker allocates the memory
+for other cores by using the total size allocated for the bakery_lock section
+and multiplying it with (PLATFORM_CORE_COUNT - 1). This enables software to
perform software cache maintenance on the lock data structure without running
into coherency issues associated with mismatched attributes.
-The per-CPU data framework enables consolidation of data structures on the
-fewest cache lines possible. This saves memory as compared to the scenario where
-each data structure is separately aligned to the cache line boundary to achieve
-the same effect.
-
The bakery lock data structure `bakery_info_t` is defined for use when
`USE_COHERENT_MEM` is disabled as follows:
@@ -1615,12 +1630,10 @@
The `bakery_info_t` represents a single per-CPU field of one lock and
the combination of corresponding `bakery_info_t` structures for all CPUs in the
-system represents the complete bakery lock. It is embedded in the per-CPU
-data framework `cpu_data` as shown below:
+system represents the complete bakery lock. The view in memory for a system
+with n bakery locks are:
- CPU0 cpu_data
- ------------------
- | .... |
+ bakery_lock section start
|----------------|
| `bakery_info_t`| <-- Lock_0 per-CPU field
| Lock_0 | for CPU0
@@ -1633,12 +1646,11 @@
| `bakery_info_t`| <-- Lock_N per-CPU field
| Lock_N | for CPU0
------------------
-
-
- CPU1 cpu_data
+ | XXXXX |
+ | Padding to |
+ | next Cache WB | <--- Calculate PERCPU_BAKERY_LOCK_SIZE, allocate
+ | Granule | continuous memory for remaining CPUs.
------------------
- | .... |
- |----------------|
| `bakery_info_t`| <-- Lock_0 per-CPU field
| Lock_0 | for CPU1
|----------------|
@@ -1650,14 +1662,20 @@
| `bakery_info_t`| <-- Lock_N per-CPU field
| Lock_N | for CPU1
------------------
+ | XXXXX |
+ | Padding to |
+ | next Cache WB |
+ | Granule |
+ ------------------
-Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an
+Consider a system of 2 CPUs with 'N' bakery locks as shown above. For an
operation on Lock_N, the corresponding `bakery_info_t` in both CPU0 and CPU1
-`cpu_data` need to be fetched and appropriate cache operations need to be
-performed for each access.
+`bakery_lock` section need to be fetched and appropriate cache operations need
+to be performed for each access.
-For multiple bakery locks, an array of `bakery_info_t` is declared in `cpu_data`
-and each lock is given an `id` to identify it in the array.
+On ARM Platforms, bakery locks are used in psci (`psci_locks`) and power controller
+driver (`arm_lock`).
+
### Non Functional Impact of removing coherent memory
@@ -1680,10 +1698,9 @@
As mentioned earlier, almost a page of memory can be saved by disabling
`USE_COHERENT_MEM`. Each platform needs to consider these trade-offs to decide
whether coherent memory should be used. If a platform disables
-`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it should
-reserve memory in `cpu_data` by defining the macro `PLAT_PCPU_DATA_SIZE` (see
-the [Porting Guide]). Refer to the reference platform code for examples.
-
+`USE_COHERENT_MEM` and needs to use bakery locks in the porting layer, it can
+optionally define macro `PLAT_PERCPU_BAKERY_LOCK_SIZE` (see the [Porting
+Guide]). Refer to the reference platform code for examples.
12. Code Structure
-------------------
diff --git a/docs/porting-guide.md b/docs/porting-guide.md
index 6846ddf..50d36ea 100644
--- a/docs/porting-guide.md
+++ b/docs/porting-guide.md
@@ -76,21 +76,24 @@
stage. In ARM standard platforms, each BL stage configures the MMU in
the platform-specific architecture setup function, `blX_plat_arch_setup()`.
-If the build option `USE_COHERENT_MEM` is enabled, each platform must allocate a
+If the build option `USE_COHERENT_MEM` is enabled, each platform can allocate a
block of identity mapped secure memory with Device-nGnRE attributes aligned to
-page boundary (4K) for each BL stage. This memory is identified by the section
-name `tzfw_coherent_mem` so that its possible for the firmware to place
-variables in it using the following C code directive:
+page boundary (4K) for each BL stage. All sections which allocate coherent
+memory are grouped under `coherent_ram`. For ex: Bakery locks are placed in a
+section identified by name `bakery_lock` inside `coherent_ram` so that its
+possible for the firmware to place variables in it using the following C code
+directive:
- __attribute__ ((section("tzfw_coherent_mem")))
+ __attribute__ ((section("bakery_lock")))
Or alternatively the following assembler code directive:
- .section tzfw_coherent_mem
+ .section bakery_lock
-The `tzfw_coherent_mem` section is used to allocate any data structures that are
-accessed both when a CPU is executing with its MMU and caches enabled, and when
-it's running with its MMU and caches disabled. Examples are given below.
+The `coherent_ram` section is a sum of all sections like `bakery_lock` which are
+used to allocate any data structures that are accessed both when a CPU is
+executing with its MMU and caches enabled, and when it's running with its MMU
+and caches disabled. Examples are given below.
The following variables, functions and constants must be defined by the platform
for the firmware to work correctly.
@@ -1150,6 +1153,24 @@
modes table.
+### #define : PLAT_PERCPU_BAKERY_LOCK_SIZE [optional]
+
+ When `USE_COHERENT_MEM = 0`, this constant defines the total memory (in
+ bytes) aligned to the cache line boundary that should be allocated per-cpu to
+ accommodate all the bakery locks.
+
+ If this constant is not defined when `USE_COHERENT_MEM = 0`, the linker
+ calculates the size of the `bakery_lock` input section, aligns it to the
+ nearest `CACHE_WRITEBACK_GRANULE`, multiplies it with `PLATFORM_CORE_COUNT`
+ and stores the result in a linker symbol. This constant prevents a platform
+ from relying on the linker and provide a more efficient mechanism for
+ accessing per-cpu bakery lock information.
+
+ If this constant is defined and its value is not equal to the value
+ calculated by the linker then a link time assertion is raised. A compile time
+ assertion is raised if the value of the constant is not aligned to the cache
+ line boundary.
+
3.3 Power State Coordination Interface (in BL3-1)
------------------------------------------------
diff --git a/drivers/arm/ccn/ccn.c b/drivers/arm/ccn/ccn.c
new file mode 100644
index 0000000..aef891b
--- /dev/null
+++ b/drivers/arm/ccn/ccn.c
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arch.h>
+#include <assert.h>
+#include <bakery_lock.h>
+#include <ccn.h>
+#include <debug.h>
+#include <errno.h>
+#include <mmio.h>
+#include "ccn_private.h"
+
+static const ccn_desc_t *ccn_plat_desc;
+#if IMAGE_BL31
+DEFINE_BAKERY_LOCK(ccn_lock);
+#endif
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV), a
+ * region ID of one of the 256 regions (0-255) and a register offset within the
+ * region. It converts the first two parameters into a base address and uses it
+ * to read the register at the offset.
+ ******************************************************************************/
+static inline unsigned long long ccn_reg_read(uintptr_t periphbase,
+ unsigned int region_id,
+ unsigned int register_offset)
+{
+ uintptr_t region_base;
+
+ assert(periphbase);
+ assert(region_id < REGION_ID_LIMIT);
+
+ region_base = periphbase + region_id_to_base(region_id);
+ return mmio_read_64(region_base + register_offset);
+}
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV), a
+ * region ID of one of the 256 regions (0-255), a register offset within the
+ * region and a value. It converts the first two parameters into a base address
+ * and uses it to write the value in the register at the offset.
+ ******************************************************************************/
+static inline void ccn_reg_write(uintptr_t periphbase,
+ unsigned int region_id,
+ unsigned int register_offset,
+ unsigned long long value)
+{
+ uintptr_t region_base;
+
+ assert(periphbase);
+ assert(region_id < REGION_ID_LIMIT);
+
+ region_base = periphbase + region_id_to_base(region_id);
+ mmio_write_64(region_base + register_offset, value);
+}
+
+#if DEBUG
+
+typedef struct rn_info {
+ unsigned char node_desc[MAX_RN_NODES];
+ } rn_info_t;
+
+/*******************************************************************************
+ * This function takes the base address of the CCN's programmer's view (PV) and
+ * the node ID of a Request Node (RN-D or RN-I). It returns the maximum number
+ * of master interfaces resident on that node. This number is equal to the least
+ * significant two bits of the node type ID + 1.
+ ******************************************************************************/
+static unsigned int ccn_get_rni_mcount(uintptr_t periphbase,
+ unsigned int rn_id)
+{
+ unsigned int rn_type_id;
+
+ /* Use the node id to find the type of RN-I/D node */
+ rn_type_id = get_node_type(ccn_reg_read(periphbase,
+ rn_id + RNI_REGION_ID_START,
+ REGION_ID_OFFSET));
+
+ /* Return the number master interfaces based on node type */
+ return rn_type_id_to_master_cnt(rn_type_id);
+}
+
+/*******************************************************************************
+ * This function reads the CCN registers to find the following information about
+ * the ACE/ACELite/ACELite+DVM/CHI interfaces resident on the various types of
+ * Request Nodes (RN-Fs, RN-Is and RN-Ds) in the system:
+ *
+ * 1. The total number of such interfaces that this CCN IP supports. This is the
+ * cumulative number of interfaces across all Request node types. It is
+ * passed back as the return value of this function.
+ *
+ * 2. The maximum number of interfaces of a type resident on a Request node of
+ * one of the three types. This information is populated in the 'info'
+ * array provided by the caller as described next.
+ *
+ * The array has 64 entries. Each entry corresponds to a Request node. The
+ * Miscellaneous node's programmer's view has RN-F, RN-I and RN-D ID
+ * registers. For each RN-I and RN-D ID indicated as being present in these
+ * registers, its identification register (offset 0xFF00) is read. This
+ * register specifies the maximum number of master interfaces the node
+ * supports. For RN-Fs it is assumed that there can be only a single fully
+ * coherent master resident on each node. The counts for each type of node
+ * are use to populate the array entry at the index corresponding to the node
+ * ID i.e. rn_info[node ID] = <number of master interfaces>
+ ******************************************************************************/
+static unsigned int ccn_get_rn_master_info(uintptr_t periphbase,
+ rn_info_t *info)
+{
+ unsigned int num_masters = 0;
+ rn_types_t rn_type;
+
+ assert (info);
+
+ for (rn_type = RN_TYPE_RNF; rn_type < NUM_RN_TYPES; rn_type++) {
+ unsigned int mn_reg_off, node_id;
+ unsigned long long rn_bitmap;
+
+ /*
+ * RN-F, RN-I, RN-D node registers in the MN region occupy
+ * contiguous 16 byte apart offsets.
+ */
+ mn_reg_off = MN_RNF_NODEID_OFFSET + (rn_type << 4);
+ rn_bitmap = ccn_reg_read(periphbase, MN_REGION_ID, mn_reg_off);
+
+ FOR_EACH_PRESENT_NODE_ID(node_id, rn_bitmap) {
+ unsigned int node_mcount;
+
+ /*
+ * A RN-F does not have a node type since it does not
+ * export a programmer's interface. It can only have a
+ * single fully coherent master residing on it. If the
+ * offset of the MN(Miscellaneous Node) register points
+ * to a RN-I/D node then the master count is set to the
+ * maximum number of master interfaces that can possibly
+ * reside on the node.
+ */
+ node_mcount = (mn_reg_off == MN_RNF_NODEID_OFFSET ? 1 :
+ ccn_get_rni_mcount(periphbase, node_id));
+
+ /*
+ * Use this value to increment the maximum possible
+ * master interfaces in the system.
+ */
+ num_masters += node_mcount;
+
+ /*
+ * Update the entry in 'info' for this node ID with
+ * the maximum number of masters than can sit on
+ * it. This information will be used to validate the
+ * node information passed by the platform later.
+ */
+ info->node_desc[node_id] = node_mcount;
+ }
+ }
+
+ return num_masters;
+}
+
+/*******************************************************************************
+ * This function validates parameters passed by the platform (in a debug build).
+ * It collects information about the maximum number of master interfaces that:
+ * a) the CCN IP can accommodate and
+ * b) can exist on each Request node.
+ * It compares this with the information provided by the platform to determine
+ * the validity of the latter.
+ ******************************************************************************/
+static void ccn_validate_plat_params(const ccn_desc_t *plat_desc)
+{
+ unsigned int master_id, num_rn_masters;
+ rn_info_t info = { {0} };
+
+ assert(plat_desc);
+ assert(plat_desc->periphbase);
+ assert(plat_desc->master_to_rn_id_map);
+ assert(plat_desc->num_masters);
+ assert(plat_desc->num_masters < CCN_MAX_RN_MASTERS);
+
+ /*
+ * Find the number and properties of fully coherent, IO coherent and IO
+ * coherent + DVM master interfaces
+ */
+ num_rn_masters = ccn_get_rn_master_info(plat_desc->periphbase, &info);
+ assert(plat_desc->num_masters < num_rn_masters);
+
+ /*
+ * Iterate through the Request nodes specified by the platform.
+ * Decrement the count of the masters in the 'info' array for each
+ * Request node encountered. If the count would drop below 0 then the
+ * platform's view of this aspect of CCN configuration is incorrect.
+ */
+ for (master_id = 0; master_id < plat_desc->num_masters; master_id++) {
+ unsigned int node_id;
+
+ node_id = plat_desc->master_to_rn_id_map[master_id];
+ assert(node_id < MAX_RN_NODES);
+ assert(info.node_desc[node_id]);
+ info.node_desc[node_id]--;
+ }
+}
+#endif /* DEBUG */
+
+/*******************************************************************************
+ * This function validates parameters passed by the platform (in a debug build)
+ * and initialises its internal data structures. A lock is required to prevent
+ * simultaneous CCN operations at runtime (only BL31) to add and remove Request
+ * nodes from coherency.
+ ******************************************************************************/
+void ccn_init(const ccn_desc_t *plat_desc)
+{
+#if DEBUG
+ ccn_validate_plat_params(plat_desc);
+#endif
+
+ ccn_plat_desc = plat_desc;
+}
+
+/*******************************************************************************
+ * This function converts a bit map of master interface IDs to a bit map of the
+ * Request node IDs that they reside on.
+ ******************************************************************************/
+static unsigned long long ccn_master_to_rn_id_map(unsigned long long master_map)
+{
+ unsigned long long rn_id_map = 0;
+ unsigned int node_id, iface_id;
+
+ assert(master_map);
+ assert(ccn_plat_desc);
+
+ FOR_EACH_PRESENT_MASTER_INTERFACE(iface_id, master_map) {
+
+ /* Convert the master ID into the node ID */
+ node_id = ccn_plat_desc->master_to_rn_id_map[iface_id];
+
+ /* Set the bit corresponding to this node ID */
+ rn_id_map |= (1UL << node_id);
+ }
+
+ return rn_id_map;
+}
+
+/*******************************************************************************
+ * This function executes the necessary operations to add or remove Request node
+ * IDs specified in the 'rn_id_map' bitmap from the snoop/DVM domains specified
+ * in the 'hn_id_map'. The 'region_id' specifies the ID of the first HN-F/HN-I
+ * on which the operation should be performed. 'op_reg_offset' specifies the
+ * type of operation (add/remove). 'stat_reg_offset' specifies the register
+ * which should be polled to determine if the operation has completed or not.
+ ******************************************************************************/
+static void ccn_snoop_dvm_do_op(unsigned long long rn_id_map,
+ unsigned long long hn_id_map,
+ unsigned int region_id,
+ unsigned int op_reg_offset,
+ unsigned int stat_reg_offset)
+{
+ unsigned int start_region_id;
+
+ assert(ccn_plat_desc);
+ assert(ccn_plat_desc->periphbase);
+
+#if IMAGE_BL31
+ bakery_lock_get(&ccn_lock);
+#endif
+ start_region_id = region_id;
+ FOR_EACH_PRESENT_REGION_ID(start_region_id, hn_id_map) {
+ ccn_reg_write(ccn_plat_desc->periphbase,
+ start_region_id,
+ op_reg_offset,
+ rn_id_map);
+ }
+
+ start_region_id = region_id;
+
+ FOR_EACH_PRESENT_REGION_ID(start_region_id, hn_id_map) {
+ WAIT_FOR_DOMAIN_CTRL_OP_COMPLETION(start_region_id,
+ stat_reg_offset,
+ op_reg_offset,
+ rn_id_map);
+ }
+
+#if IMAGE_BL31
+ bakery_lock_release(&ccn_lock);
+#endif
+}
+
+/*******************************************************************************
+ * This function reads the bitmap of Home nodes on the basis of the
+ * 'mn_hn_id_reg_offset' parameter from the Miscellaneous node's (MN)
+ * programmer's view. The MN has a register which carries the bitmap of present
+ * Home nodes of each type i.e. HN-Fs, HN-Is & HN-Ds. It calls
+ * 'ccn_snoop_dvm_do_op()' with this information to perform the actual
+ * operation.
+ ******************************************************************************/
+static void ccn_snoop_dvm_domain_common(unsigned long long rn_id_map,
+ unsigned int hn_op_reg_offset,
+ unsigned int hn_stat_reg_offset,
+ unsigned int mn_hn_id_reg_offset,
+ unsigned int hn_region_id)
+{
+ unsigned long long mn_hn_id_map;
+
+ assert(ccn_plat_desc);
+ assert(ccn_plat_desc->periphbase);
+
+ mn_hn_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+ MN_REGION_ID,
+ mn_hn_id_reg_offset);
+ ccn_snoop_dvm_do_op(rn_id_map,
+ mn_hn_id_map,
+ hn_region_id,
+ hn_op_reg_offset,
+ hn_stat_reg_offset);
+}
+
+/*******************************************************************************
+ * The following functions provide the boot and runtime API to the platform for
+ * adding and removing master interfaces from the snoop/DVM domains. A bitmap of
+ * master interfaces IDs is passed as a parameter. It is converted into a bitmap
+ * of Request node IDs using the mapping provided by the platform while
+ * initialising the driver.
+ * For example, consider a dual cluster system where the clusters have values 0
+ * & 1 in the affinity level 1 field of their respective MPIDRs. While
+ * initialising this driver, the platform provides the mapping between each
+ * cluster and the corresponding Request node. To add or remove a cluster from
+ * the snoop and dvm domain, the bit position corresponding to the cluster ID
+ * should be set in the 'master_iface_map' i.e. to remove both clusters the
+ * bitmap would equal 0x11.
+ ******************************************************************************/
+void ccn_enter_snoop_dvm_domain(unsigned long long master_iface_map)
+{
+ unsigned long long rn_id_map;
+
+ rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+ ccn_snoop_dvm_domain_common(rn_id_map,
+ HNF_SDC_SET_OFFSET,
+ HNF_SDC_STAT_OFFSET,
+ MN_HNF_NODEID_OFFSET,
+ HNF_REGION_ID_START);
+
+ ccn_snoop_dvm_domain_common(rn_id_map,
+ MN_DDC_SET_OFF,
+ MN_DDC_STAT_OFFSET,
+ MN_HNI_NODEID_OFFSET,
+ MN_REGION_ID);
+}
+
+void ccn_exit_snoop_dvm_domain(unsigned long long master_iface_map)
+{
+ unsigned long long rn_id_map;
+
+ rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+ ccn_snoop_dvm_domain_common(rn_id_map,
+ HNF_SDC_CLR_OFFSET,
+ HNF_SDC_STAT_OFFSET,
+ MN_HNF_NODEID_OFFSET,
+ HNF_REGION_ID_START);
+
+ ccn_snoop_dvm_domain_common(rn_id_map,
+ MN_DDC_CLR_OFFSET,
+ MN_DDC_STAT_OFFSET,
+ MN_HNI_NODEID_OFFSET,
+ MN_REGION_ID);
+}
+
+void ccn_enter_dvm_domain(unsigned long long master_iface_map)
+{
+ unsigned long long rn_id_map;
+
+ rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+ ccn_snoop_dvm_domain_common(rn_id_map,
+ MN_DDC_SET_OFF,
+ MN_DDC_STAT_OFFSET,
+ MN_HNI_NODEID_OFFSET,
+ MN_REGION_ID);
+}
+
+void ccn_exit_dvm_domain(unsigned long long master_iface_map)
+{
+ unsigned long long rn_id_map;
+
+ rn_id_map = ccn_master_to_rn_id_map(master_iface_map);
+ ccn_snoop_dvm_domain_common(rn_id_map,
+ MN_DDC_CLR_OFFSET,
+ MN_DDC_STAT_OFFSET,
+ MN_HNI_NODEID_OFFSET,
+ MN_REGION_ID);
+}
+
+/*******************************************************************************
+ * This function returns the run mode of all the L3 cache partitions in the
+ * system. The state is expected to be one of NO_L3, SF_ONLY, L3_HAM or
+ * L3_FAM. Instead of comparing the states reported by all HN-Fs, the state of
+ * the first present HN-F node is reported. Since the driver does not export an
+ * interface to program them seperately, there is no reason to perform this
+ * check. An HN-F could report that the L3 cache is transitioning from one mode
+ * to another e.g. HNF_PM_NOL3_2_SFONLY. In this case, the function waits for
+ * the transition to complete and reports the final state.
+ ******************************************************************************/
+unsigned int ccn_get_l3_run_mode(void)
+{
+ unsigned long long hnf_pstate_stat;
+
+ assert(ccn_plat_desc);
+ assert(ccn_plat_desc->periphbase);
+
+ /*
+ * Wait for a L3 cache paritition to enter any run mode. The pstate
+ * parameter is read from an HN-F P-state status register. A non-zero
+ * value in bits[1:0] means that the cache is transitioning to a run
+ * mode.
+ */
+ do {
+ hnf_pstate_stat = ccn_reg_read(ccn_plat_desc->periphbase,
+ HNF_REGION_ID_START,
+ HNF_PSTATE_STAT_OFFSET);
+ } while (hnf_pstate_stat & 0x3);
+
+ return PSTATE_TO_RUN_MODE(hnf_pstate_stat);
+}
+
+/*******************************************************************************
+ * This function sets the run mode of all the L3 cache partitions in the
+ * system to one of NO_L3, SF_ONLY, L3_HAM or L3_FAM depending upon the state
+ * specified by the 'mode' argument.
+ ******************************************************************************/
+void ccn_set_l3_run_mode(unsigned int mode)
+{
+ unsigned long long mn_hnf_id_map, hnf_pstate_stat;
+ unsigned int region_id;
+
+ assert(ccn_plat_desc);
+ assert(ccn_plat_desc->periphbase);
+ assert(mode <= CCN_L3_RUN_MODE_FAM);
+
+ mn_hnf_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+ MN_REGION_ID,
+ MN_HNF_NODEID_OFFSET);
+ region_id = HNF_REGION_ID_START;
+
+ /* Program the desired run mode */
+ FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+ ccn_reg_write(ccn_plat_desc->periphbase,
+ region_id,
+ HNF_PSTATE_REQ_OFFSET,
+ mode);
+ }
+
+ /* Wait for the caches to transition to the run mode */
+ region_id = HNF_REGION_ID_START;
+ FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+ /*
+ * Wait for a L3 cache paritition to enter a target run
+ * mode. The pstate parameter is read from an HN-F P-state
+ * status register.
+ */
+ do {
+ hnf_pstate_stat = ccn_reg_read(ccn_plat_desc->periphbase,
+ region_id,
+ HNF_PSTATE_STAT_OFFSET);
+ } while (((hnf_pstate_stat & HNF_PSTATE_MASK) >> 2) != mode);
+ }
+}
+
+/*******************************************************************************
+ * This function configures system address map and provides option to enable the
+ * 3SN striping mode of Slave node operation. The Slave node IDs and the Top
+ * Address bit1 and bit0 are provided as parameters to this function. This
+ * configuration is needed only if network contains a single SN-F or 3 SN-F and
+ * must be completed before the first request by the system to normal memory.
+ ******************************************************************************/
+void ccn_program_sys_addrmap(unsigned int sn0_id,
+ unsigned int sn1_id,
+ unsigned int sn2_id,
+ unsigned int top_addr_bit0,
+ unsigned int top_addr_bit1,
+ unsigned char three_sn_en)
+{
+ unsigned long long mn_hnf_id_map, hnf_sam_ctrl_value;
+ unsigned int region_id;
+
+ assert(ccn_plat_desc);
+ assert(ccn_plat_desc->periphbase);
+
+ mn_hnf_id_map = ccn_reg_read(ccn_plat_desc->periphbase,
+ MN_REGION_ID,
+ MN_HNF_NODEID_OFFSET);
+ region_id = HNF_REGION_ID_START;
+ hnf_sam_ctrl_value = MAKE_HNF_SAM_CTRL_VALUE(sn0_id,
+ sn1_id,
+ sn2_id,
+ top_addr_bit0,
+ top_addr_bit1,
+ three_sn_en);
+
+ FOR_EACH_PRESENT_REGION_ID(region_id, mn_hnf_id_map) {
+
+ /* Program the SAM control register */
+ ccn_reg_write(ccn_plat_desc->periphbase,
+ region_id,
+ HNF_SAM_CTRL_OFFSET,
+ hnf_sam_ctrl_value);
+ }
+
+}
diff --git a/drivers/arm/ccn/ccn_private.h b/drivers/arm/ccn/ccn_private.h
new file mode 100644
index 0000000..e92e870
--- /dev/null
+++ b/drivers/arm/ccn/ccn_private.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CCN_PRIVATE_H__
+#define __CCN_PRIVATE_H__
+
+/*
+ * A CCN implementation can have a maximum of 64 Request nodes with node IDs
+ * from 0-63. These IDs are split across the three types of Request nodes
+ * i.e. RN-F, RN-D and RN-I.
+ */
+#define MAX_RN_NODES 64
+
+/* Enum used to loop through the 3 types of Request nodes */
+typedef enum rn_types {
+ RN_TYPE_RNF = 0,
+ RN_TYPE_RNI,
+ RN_TYPE_RND,
+ NUM_RN_TYPES
+} rn_types_t;
+
+/* Macro to convert a region id to its base address */
+#define region_id_to_base(id) ((id) << 16)
+
+/*
+ * Macro to calculate the number of master interfaces resident on a RN-I/RN-D.
+ * Value of first two bits of the RN-I/D node type + 1 == Maximum number of
+ * ACE-Lite or ACE-Lite+DVM interfaces supported on this node. E.g.
+ *
+ * 0x14 : RN-I with 1 ACE-Lite interface
+ * 0x15 : RN-I with 2 ACE-Lite interfaces
+ * 0x16 : RN-I with 3 ACE-Lite interfaces
+ */
+#define rn_type_id_to_master_cnt(id) (((id) & 0x3) + 1)
+
+/*
+ * Constants used to identify a region in the programmer's view. These are
+ * common for all regions.
+ */
+#define REGION_ID_LIMIT 256
+#define REGION_ID_OFFSET 0xFF00
+
+#define REGION_NODE_ID_SHIFT 8
+#define REGION_NODE_ID_MASK 0x7f
+#define get_node_id(id_reg) (((id_reg) >> REGION_NODE_ID_SHIFT) \
+ & REGION_NODE_ID_MASK)
+
+#define REGION_NODE_TYPE_SHIFT 0
+#define REGION_NODE_TYPE_MASK 0x1f
+#define get_node_type(id_reg) (((id_reg) >> REGION_NODE_TYPE_SHIFT) \
+ & REGION_NODE_TYPE_MASK)
+
+/* Common offsets of registers to enter or exit a snoop/dvm domain */
+#define DOMAIN_CTRL_STAT_OFFSET 0x0200
+#define DOMAIN_CTRL_SET_OFFSET 0x0210
+#define DOMAIN_CTRL_CLR_OFFSET 0x0220
+
+/*
+ * Thess macros are used to determine if an operation to add or remove a Request
+ * node from the snoop/dvm domain has completed. 'rn_id_map' is a bit map of
+ * nodes. It was used to program the SET or CLEAR control register. The type of
+ * register is specified by 'op_reg_offset'. 'status_reg' is the bit map of
+ * nodes currently present in the snoop/dvm domain. 'rn_id_map' and 'status_reg'
+ * are logically ANDed and the result it stored back in the 'status_reg'. There
+ * are two outcomes of this operation:
+ *
+ * 1. If the DOMAIN_CTRL_SET_OFFSET register was programmed, then the set bits in
+ * 'rn_id_map' should appear in 'status_reg' when the operation completes. So
+ * after the AND operation, at some point of time 'status_reg' should equal
+ * 'rn_id_map'.
+ *
+ * 2. If the DOMAIN_CTRL_CLR_OFFSET register was programmed, then the set bits in
+ * 'rn_id_map' should disappear in 'status_reg' when the operation
+ * completes. So after the AND operation, at some point of time 'status_reg'
+ * should equal 0.
+ */
+#define WAIT_FOR_DOMAIN_CTRL_OP_COMPLETION(region_id, stat_reg_offset, \
+ op_reg_offset, rn_id_map) \
+ { \
+ uint64_t status_reg; \
+ do { \
+ status_reg = ccn_reg_read((ccn_plat_desc->periphbase), \
+ (region_id), \
+ (stat_reg_offset)); \
+ status_reg &= (rn_id_map); \
+ } while ((op_reg_offset) == DOMAIN_CTRL_SET_OFFSET ? \
+ (rn_id_map) != status_reg : status_reg); \
+ }
+
+/*
+ * Region ID of the Miscellaneous Node is always 0 as its located at the base of
+ * the programmer's view.
+ */
+#define MN_REGION_ID 0
+
+#define MN_REGION_ID_START 0
+#define DEBUG_REGION_ID_START 1
+#define HNI_REGION_ID_START 8
+#define SBSX_REGION_ID_START 16
+#define HNF_REGION_ID_START 32
+#define XP_REGION_ID_START 64
+#define RNI_REGION_ID_START 128
+
+/* Selected register offsets from the base of a HNF region */
+#define HNF_CFG_CTRL_OFFSET 0x0000
+#define HNF_SAM_CTRL_OFFSET 0x0008
+#define HNF_PSTATE_REQ_OFFSET 0x0010
+#define HNF_PSTATE_STAT_OFFSET 0x0018
+#define HNF_SDC_STAT_OFFSET DOMAIN_CTRL_STAT_OFFSET
+#define HNF_SDC_SET_OFFSET DOMAIN_CTRL_SET_OFFSET
+#define HNF_SDC_CLR_OFFSET DOMAIN_CTRL_CLR_OFFSET
+#define HNF_AUX_CTRL_OFFSET 0x0500
+
+/* Selected register offsets from the base of a MN region */
+#define MN_SAR_OFFSET 0x0000
+#define MN_RNF_NODEID_OFFSET 0x0180
+#define MN_RNI_NODEID_OFFSET 0x0190
+#define MN_RND_NODEID_OFFSET 0x01A0
+#define MN_HNF_NODEID_OFFSET 0x01B0
+#define MN_HNI_NODEID_OFFSET 0x01C0
+#define MN_SN_NODEID_OFFSET 0x01D0
+#define MN_DDC_STAT_OFFSET DOMAIN_CTRL_STAT_OFFSET
+#define MN_DDC_SET_OFF DOMAIN_CTRL_SET_OFFSET
+#define MN_DDC_CLR_OFFSET DOMAIN_CTRL_CLR_OFFSET
+#define MN_ID_OFFSET REGION_ID_OFFSET
+
+/* HNF System Address Map register bit masks and shifts */
+#define HNF_SAM_CTRL_SN_ID_MASK 0x7f
+#define HNF_SAM_CTRL_SN0_ID_SHIFT 0
+#define HNF_SAM_CTRL_SN1_ID_SHIFT 8
+#define HNF_SAM_CTRL_SN2_ID_SHIFT 16
+
+#define HNF_SAM_CTRL_TAB0_MASK 0x3fUL
+#define HNF_SAM_CTRL_TAB0_SHIFT 48
+#define HNF_SAM_CTRL_TAB1_MASK 0x3fUL
+#define HNF_SAM_CTRL_TAB1_SHIFT 56
+
+#define HNF_SAM_CTRL_3SN_ENB_SHIFT 32
+#define HNF_SAM_CTRL_3SN_ENB_MASK 0x01UL
+
+/*
+ * Macro to create a value suitable for programming into a HNF SAM Control
+ * register for enabling 3SN striping.
+ */
+#define MAKE_HNF_SAM_CTRL_VALUE(sn0, sn1, sn2, tab0, tab1, three_sn_en) \
+ ((((sn0) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN0_ID_SHIFT) | \
+ (((sn1) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN1_ID_SHIFT) | \
+ (((sn2) & HNF_SAM_CTRL_SN_ID_MASK) << HNF_SAM_CTRL_SN2_ID_SHIFT) | \
+ (((tab0) & HNF_SAM_CTRL_TAB0_MASK) << HNF_SAM_CTRL_TAB0_SHIFT) | \
+ (((tab1) & HNF_SAM_CTRL_TAB1_MASK) << HNF_SAM_CTRL_TAB1_SHIFT) | \
+ (((three_sn_en) & HNF_SAM_CTRL_3SN_ENB_MASK) << HNF_SAM_CTRL_3SN_ENB_SHIFT))
+
+/* Mask to read the power state value from an HN-F P-state register */
+#define HNF_PSTATE_MASK 0xf
+
+/* Macro to extract the run mode from a p-state value */
+#define PSTATE_TO_RUN_MODE(pstate) (((pstate) & HNF_PSTATE_MASK) >> 2)
+
+/*
+ * Helper macro that iterates through a given bit map. In each iteration,
+ * it returns the position of the set bit.
+ * It can be used by other utility macros to iterates through all nodes
+ * or masters given a bit map of them.
+ */
+#define FOR_EACH_BIT(bit_pos, bit_map) \
+ for (bit_pos = __builtin_ctzll(bit_map); \
+ bit_map; \
+ bit_map &= ~(1UL << bit_pos), \
+ bit_pos = __builtin_ctzll(bit_map))
+
+/*
+ * Utility macro that iterates through a bit map of node IDs. In each
+ * iteration, it returns the ID of the next present node in the bit map. Node
+ * ID of a present node == Position of set bit == Number of zeroes trailing the
+ * bit.
+ */
+#define FOR_EACH_PRESENT_NODE_ID(node_id, bit_map) \
+ FOR_EACH_BIT(node_id, bit_map)
+
+/*
+ * Helper function to return number of set bits in bitmap
+ */
+static inline unsigned int count_set_bits(uint64_t bitmap)
+{
+ unsigned int count = 0;
+
+ for (; bitmap; bitmap &= bitmap - 1)
+ ++count;
+
+ return count;
+}
+
+/*
+ * Utility macro that iterates through a bit map of node IDs. In each iteration,
+ * it returns the ID of the next present region corresponding to a node present
+ * in the bit map. Region ID of a present node is in between passed region id
+ * and region id + number of set bits in the bitmap i.e. the number of present
+ * nodes.
+ */
+#define FOR_EACH_PRESENT_REGION_ID(region_id, bit_map) \
+ for (unsigned long long region_id_limit = count_set_bits(bit_map) \
+ + region_id; \
+ region_id < region_id_limit; \
+ region_id++)
+
+/*
+ * Same macro as FOR_EACH_PRESENT_NODE, but renamed to indicate it traverses
+ * through a bit map of master interfaces.
+ */
+#define FOR_EACH_PRESENT_MASTER_INTERFACE(iface_id, bit_map) \
+ FOR_EACH_BIT(iface_id, bit_map)
+#endif /* __CCN_PRIVATE_H__ */
diff --git a/include/bl31/services/psci.h b/include/bl31/services/psci.h
index 30a53ca..6298a40 100644
--- a/include/bl31/services/psci.h
+++ b/include/bl31/services/psci.h
@@ -251,9 +251,6 @@
/* The local power state of this CPU */
plat_local_state_t local_state;
-#if !USE_COHERENT_MEM
- bakery_info_t pcpu_bakery_info[PSCI_NUM_NON_CPU_PWR_DOMAINS];
-#endif
} psci_cpu_data_t;
/*******************************************************************************
diff --git a/include/common/el3_common_macros.S b/include/common/el3_common_macros.S
index 7946e72..87e172e 100644
--- a/include/common/el3_common_macros.S
+++ b/include/common/el3_common_macros.S
@@ -214,6 +214,21 @@
* ---------------------------------------------------------------------
*/
.if \_init_c_runtime
+#if IMAGE_BL31
+ /* -------------------------------------------------------------
+ * Invalidate the RW memory used by the BL31 image. This
+ * includes the data and NOBITS sections. This is done to
+ * safeguard against possible corruption of this memory by
+ * dirty cache lines in a system cache as a result of use by
+ * an earlier boot loader stage.
+ * -------------------------------------------------------------
+ */
+ adr x0, __RW_START__
+ adr x1, __RW_END__
+ sub x1, x1, x0
+ bl inv_dcache_range
+#endif /* IMAGE_BL31 */
+
ldr x0, =__BSS_START__
ldr x1, =__BSS_SIZE__
bl zeromem16
diff --git a/include/drivers/arm/ccn.h b/include/drivers/arm/ccn.h
new file mode 100644
index 0000000..2361596
--- /dev/null
+++ b/include/drivers/arm/ccn.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CCN_H__
+#define __CCN_H__
+
+/*
+ * This macro defines the maximum number of master interfaces that reside on
+ * Request nodes which the CCN driver can accommodate. The driver APIs to add
+ * and remove Request nodes from snoop/dvm domains take a bit map of master
+ * interfaces as inputs. The largest C data type that can be used is a 64-bit
+ * unsigned integer. Hence the value of 64. The platform will have to ensure
+ * that the master interfaces are numbered from 0-63.
+ */
+#define CCN_MAX_RN_MASTERS 64
+
+/*
+ * The following constants define the various run modes that the platform can
+ * request the CCN driver to place the L3 cache in. These map to the
+ * programmable P-State values in a HN-F P-state register.
+ */
+#define CCN_L3_RUN_MODE_NOL3 0x0 /* HNF_PM_NOL3 */
+#define CCN_L3_RUN_MODE_SFONLY 0x1 /* HNF_PM_SFONLY */
+#define CCN_L3_RUN_MODE_HAM 0x2 /* HNF_PM_HALF */
+#define CCN_L3_RUN_MODE_FAM 0x3 /* HNF_PM_FULL */
+
+/*
+ * The following macro takes the value returned from a read of a HN-F P-state
+ * status register and returns the retention state value.
+ */
+#define CCN_GET_RETENTION_STATE(pstate) ((pstate >> 4) & 0x3)
+
+/*
+ * The following macro takes the value returned from a read of a HN-F P-state
+ * status register and returns the run state value.
+ */
+#define CCN_GET_RUN_STATE(pstate) (pstate & 0xf)
+
+#ifndef __ASSEMBLY__
+#include <stdint.h>
+
+/*
+ * This structure describes some of the implementation defined attributes of the
+ * CCN IP. It is used by the platform port to specify these attributes in order
+ * to initialise the CCN driver. The attributes are described below.
+ *
+ * 1. The 'num_masters' field specifies the total number of master interfaces
+ * resident on Request nodes.
+ *
+ * 2. The 'master_to_rn_id_map' field is a ponter to an array in which each
+ * index corresponds to a master interface and its value corresponds to the
+ * Request node on which the master interface resides.
+ * This field is not simply defined as an array of size CCN_MAX_RN_MASTERS.
+ * In reality, a platform will have much fewer master * interfaces than
+ * CCN_MAX_RN_MASTERS. With an array of this size, it would also have to
+ * set the unused entries to a suitable value. Zeroing the array would not
+ * be enough since 0 is also a valid node id. Hence, such an array is not
+ * used.
+ *
+ * 3. The 'periphbase' field is the base address of the programmer's view of the
+ * CCN IP.
+ */
+typedef struct ccn_desc {
+ unsigned int num_masters;
+ const unsigned char *master_to_rn_id_map;
+ uintptr_t periphbase;
+} ccn_desc_t;
+
+
+void ccn_init(const ccn_desc_t *plat_ccn_desc);
+void ccn_enter_snoop_dvm_domain(unsigned long long master_iface_map);
+void ccn_exit_snoop_dvm_domain(unsigned long long master_iface_map);
+void ccn_enter_dvm_domain(unsigned long long master_iface_map);
+void ccn_exit_dvm_domain(unsigned long long master_iface_map);
+void ccn_set_l3_run_mode(unsigned int mode);
+void ccn_program_sys_addrmap(unsigned int sn0_id,
+ unsigned int sn1_id,
+ unsigned int sn2_id,
+ unsigned int top_addr_bit0,
+ unsigned int top_addr_bit1,
+ unsigned char three_sn_en);
+unsigned int ccn_get_l3_run_mode(void);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __CCN_H__ */
diff --git a/include/drivers/arm/nic_400.h b/include/drivers/arm/nic_400.h
new file mode 100644
index 0000000..1031662
--- /dev/null
+++ b/include/drivers/arm/nic_400.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of ARM nor the names of its contributors may be used
+ * to endorse or promote products derived from this software without specific
+ * prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NIC_400_H__
+#define __NIC_400_H__
+
+/*
+ * Address of slave 'n' security setting in the NIC-400 address region
+ * control
+ */
+#define NIC400_ADDR_CTRL_SECURITY_REG(n) (0x8 + (n) * 4)
+
+#endif /* __NIC_400_H__ */
diff --git a/include/lib/aarch64/arch_helpers.h b/include/lib/aarch64/arch_helpers.h
index b7ab3da..d01ea31 100644
--- a/include/lib/aarch64/arch_helpers.h
+++ b/include/lib/aarch64/arch_helpers.h
@@ -145,6 +145,7 @@
DEFINE_SYSOP_TYPE_PARAM_FUNC(at, s12e0w)
void flush_dcache_range(uint64_t, uint64_t);
+void clean_dcache_range(uint64_t, uint64_t);
void inv_dcache_range(uint64_t, uint64_t);
void dcsw_op_louis(uint32_t);
void dcsw_op_all(uint32_t);
diff --git a/include/lib/bakery_lock.h b/include/lib/bakery_lock.h
index 2e1afa2..86adb9c 100644
--- a/include/lib/bakery_lock.h
+++ b/include/lib/bakery_lock.h
@@ -56,6 +56,11 @@
* External bakery lock interface.
****************************************************************************/
#if USE_COHERENT_MEM
+/*
+ * Bakery locks are stored in coherent memory
+ *
+ * Each lock's data is contiguous and fully allocated by the compiler
+ */
typedef struct bakery_lock {
/*
@@ -67,12 +72,15 @@
volatile uint16_t lock_data[BAKERY_LOCK_MAX_CPUS];
} bakery_lock_t;
-void bakery_lock_init(bakery_lock_t *bakery);
-void bakery_lock_get(bakery_lock_t *bakery);
-void bakery_lock_release(bakery_lock_t *bakery);
-int bakery_lock_try(bakery_lock_t *bakery);
-
#else
+/*
+ * Bakery locks are stored in normal .bss memory
+ *
+ * Each lock's data is spread across multiple cache lines, one per CPU,
+ * but multiple locks can share the same cache line.
+ * The compiler will allocate enough memory for one CPU's bakery locks,
+ * the remaining cache lines are allocated by the linker script
+ */
typedef struct bakery_info {
/*
@@ -84,9 +92,19 @@
volatile uint16_t lock_data;
} bakery_info_t;
-void bakery_lock_get(unsigned int id, unsigned int offset);
-void bakery_lock_release(unsigned int id, unsigned int offset);
+typedef bakery_info_t bakery_lock_t;
#endif /* __USE_COHERENT_MEM__ */
+
+inline void bakery_lock_init(bakery_lock_t *bakery) {}
+void bakery_lock_get(bakery_lock_t *bakery);
+void bakery_lock_release(bakery_lock_t *bakery);
+
+#define DEFINE_BAKERY_LOCK(_name) bakery_lock_t _name \
+ __attribute__ ((section("bakery_lock")))
+
+#define DECLARE_BAKERY_LOCK(_name) extern bakery_lock_t _name
+
+
#endif /* __ASSEMBLY__ */
#endif /* __BAKERY_LOCK_H__ */
diff --git a/include/plat/arm/common/arm_def.h b/include/plat/arm/common/arm_def.h
index 377bfaa..c236970 100644
--- a/include/plat/arm/common/arm_def.h
+++ b/include/plat/arm/common/arm_def.h
@@ -173,10 +173,6 @@
#define ARM_CONSOLE_BAUDRATE 115200
-/* TZC related constants */
-#define ARM_TZC_BASE 0x2a4a0000
-
-
/******************************************************************************
* Required platform porting definitions common to all ARM standard platforms
*****************************************************************************/
@@ -210,14 +206,6 @@
*/
#define CACHE_WRITEBACK_GRANULE (1 << ARM_CACHE_WRITEBACK_SHIFT)
-#if !USE_COHERENT_MEM
-/*
- * Size of the per-cpu data in bytes that should be reserved in the generic
- * per-cpu data structure for the ARM platform port.
- */
-#define PLAT_PCPU_DATA_SIZE 2
-#endif
-
/*******************************************************************************
* BL1 specific defines.
@@ -305,4 +293,10 @@
#define TSP_IRQ_SEC_PHY_TIMER ARM_IRQ_SEC_PHY_TIMER
+/*
+ * One cache line needed for bakery locks on ARM platforms
+ */
+#define PLAT_PERCPU_BAKERY_LOCK_SIZE (1 * CACHE_WRITEBACK_GRANULE)
+
+
#endif /* __ARM_DEF_H__ */
diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h
index 823212c..ad41f4f 100644
--- a/include/plat/arm/common/plat_arm.h
+++ b/include/plat/arm/common/plat_arm.h
@@ -71,14 +71,11 @@
);
#if IMAGE_BL31
-#if USE_COHERENT_MEM
-
/*
* Use this macro to instantiate lock before it is used in below
* arm_lock_xxx() macros
*/
-#define ARM_INSTANTIATE_LOCK bakery_lock_t arm_lock \
- __attribute__ ((section("tzfw_coherent_mem")));
+#define ARM_INSTANTIATE_LOCK DEFINE_BAKERY_LOCK(arm_lock);
/*
* These are wrapper macros to the Coherent Memory Bakery Lock API.
@@ -89,58 +86,9 @@
#else
-/*******************************************************************************
- * Constants to specify how many bakery locks this platform implements. These
- * are used if the platform chooses not to use coherent memory for bakery lock
- * data structures.
- ******************************************************************************/
-#define ARM_MAX_BAKERIES 1
-#define ARM_PWRC_BAKERY_ID 0
-
-/* Empty definition */
-#define ARM_INSTANTIATE_LOCK
-
-/*******************************************************************************
- * Definition of structure which holds platform specific per-cpu data. Currently
- * it holds only the bakery lock information for each cpu.
- ******************************************************************************/
-typedef struct arm_cpu_data {
- bakery_info_t pcpu_bakery_info[ARM_MAX_BAKERIES];
-} arm_cpu_data_t;
-
-/* Macro to define the offset of bakery_info_t in arm_cpu_data_t */
-#define ARM_CPU_DATA_LOCK_OFFSET __builtin_offsetof\
- (arm_cpu_data_t, pcpu_bakery_info)
-
-
-/*******************************************************************************
- * Helper macros for bakery lock api when using the above arm_cpu_data_t for
- * bakery lock data structures. It assumes that the bakery_info is at the
- * beginning of the platform specific per-cpu data.
- ******************************************************************************/
-#define arm_lock_init() /* No init required */
-#define arm_lock_get() bakery_lock_get(ARM_PWRC_BAKERY_ID, \
- CPU_DATA_PLAT_PCPU_OFFSET + \
- ARM_CPU_DATA_LOCK_OFFSET)
-#define arm_lock_release() bakery_lock_release(ARM_PWRC_BAKERY_ID, \
- CPU_DATA_PLAT_PCPU_OFFSET + \
- ARM_CPU_DATA_LOCK_OFFSET)
-
/*
- * Ensure that the size of the platform specific per-cpu data structure and
- * the size of the memory allocated in generic per-cpu data for the platform
- * are the same.
+ * Empty macros for all other BL stages other than BL3-1
*/
-CASSERT(PLAT_PCPU_DATA_SIZE == sizeof(arm_cpu_data_t),
- arm_pcpu_data_size_mismatch);
-
-#endif /* USE_COHERENT_MEM */
-
-#else
-
-/*
-* Dummy macros for all other BL stages other than BL3-1
-*/
#define ARM_INSTANTIATE_LOCK
#define arm_lock_init()
#define arm_lock_get()
diff --git a/include/plat/arm/css/common/css_def.h b/include/plat/arm/css/common/css_def.h
index 157a22f..38ff9dd 100644
--- a/include/plat/arm/css/common/css_def.h
+++ b/include/plat/arm/css/common/css_def.h
@@ -111,6 +111,9 @@
/* TZC related constants */
#define PLAT_ARM_TZC_FILTERS REG_ATTR_FILTER_BIT_ALL
+#define PLAT_ARM_TZC_BASE 0x2a4a0000
+/* System timer related constants */
+#define PLAT_ARM_NSTIMER_FRAME_ID 1
#endif /* __CSS_DEF_H__ */
diff --git a/lib/aarch64/cache_helpers.S b/lib/aarch64/cache_helpers.S
index 0dbab1b..476b906 100644
--- a/lib/aarch64/cache_helpers.S
+++ b/lib/aarch64/cache_helpers.S
@@ -32,6 +32,7 @@
#include <asm_macros.S>
.globl flush_dcache_range
+ .globl clean_dcache_range
.globl inv_dcache_range
.globl dcsw_op_louis
.globl dcsw_op_all
@@ -39,25 +40,39 @@
.globl dcsw_op_level2
.globl dcsw_op_level3
- /* ------------------------------------------
- * Clean+Invalidate from base address till
- * size. 'x0' = addr, 'x1' = size
- * ------------------------------------------
- */
-func flush_dcache_range
+/*
+ * This macro can be used for implementing various data cache operations `op`
+ */
+.macro do_dcache_maintenance_by_mva op
dcache_line_size x2, x3
add x1, x0, x1
sub x3, x2, #1
bic x0, x0, x3
-flush_loop:
- dc civac, x0
+loop_\op:
+ dc \op, x0
add x0, x0, x2
cmp x0, x1
- b.lo flush_loop
+ b.lo loop_\op
dsb sy
ret
+.endm
+ /* ------------------------------------------
+ * Clean+Invalidate from base address till
+ * size. 'x0' = addr, 'x1' = size
+ * ------------------------------------------
+ */
+func flush_dcache_range
+ do_dcache_maintenance_by_mva civac
endfunc flush_dcache_range
+ /* ------------------------------------------
+ * Clean from base address till size.
+ * 'x0' = addr, 'x1' = size
+ * ------------------------------------------
+ */
+func clean_dcache_range
+ do_dcache_maintenance_by_mva cvac
+endfunc clean_dcache_range
/* ------------------------------------------
* Invalidate from base address till
@@ -65,17 +80,7 @@
* ------------------------------------------
*/
func inv_dcache_range
- dcache_line_size x2, x3
- add x1, x0, x1
- sub x3, x2, #1
- bic x0, x0, x3
-inv_loop:
- dc ivac, x0
- add x0, x0, x2
- cmp x0, x1
- b.lo inv_loop
- dsb sy
- ret
+ do_dcache_maintenance_by_mva ivac
endfunc inv_dcache_range
diff --git a/lib/aarch64/misc_helpers.S b/lib/aarch64/misc_helpers.S
index 5f80b59..e7c246e 100644
--- a/lib/aarch64/misc_helpers.S
+++ b/lib/aarch64/misc_helpers.S
@@ -141,9 +141,6 @@
/* ---------------------------------------------------------------------------
* Disable the MMU at EL3
- * This is implemented in assembler to ensure that the data cache is cleaned
- * and invalidated after the MMU is disabled without any intervening cacheable
- * data accesses
* ---------------------------------------------------------------------------
*/
@@ -154,8 +151,8 @@
bic x0, x0, x1
msr sctlr_el3, x0
isb // ensure MMU is off
- mov x0, #DCCISW // DCache clean and invalidate
- b dcsw_op_all
+ dsb sy
+ ret
endfunc disable_mmu_el3
diff --git a/lib/locks/bakery/bakery_lock_coherent.c b/lib/locks/bakery/bakery_lock_coherent.c
index 1c60dba..f221222 100644
--- a/lib/locks/bakery/bakery_lock_coherent.c
+++ b/lib/locks/bakery/bakery_lock_coherent.c
@@ -63,16 +63,6 @@
assert(entry < BAKERY_LOCK_MAX_CPUS); \
} while (0)
-/* Initialize Bakery Lock to reset all ticket values */
-void bakery_lock_init(bakery_lock_t *bakery)
-{
- assert(bakery);
-
- /* All ticket values need to be 0 */
- memset(bakery, 0, sizeof(*bakery));
-}
-
-
/* Obtain a ticket for a given CPU */
static unsigned int bakery_get_ticket(bakery_lock_t *bakery, unsigned int me)
{
diff --git a/lib/locks/bakery/bakery_lock_normal.c b/lib/locks/bakery/bakery_lock_normal.c
index 3ca76e0..45b870b 100644
--- a/lib/locks/bakery/bakery_lock_normal.c
+++ b/lib/locks/bakery/bakery_lock_normal.c
@@ -56,12 +56,29 @@
* accesses regardless of status of address translation.
*/
-/* This macro assumes that the bakery_info array is located at the offset specified */
-#define get_my_bakery_info(offset, id) \
- (((bakery_info_t *) (((uint8_t *)_cpu_data()) + offset)) + id)
+#ifdef PLAT_PERCPU_BAKERY_LOCK_SIZE
+/*
+ * Verify that the platform defined value for the per-cpu space for bakery locks is
+ * a multiple of the cache line size, to prevent multiple CPUs writing to the same
+ * bakery lock cache line
+ *
+ * Using this value, if provided, rather than the linker generated value results in
+ * more efficient code
+ */
+CASSERT((PLAT_PERCPU_BAKERY_LOCK_SIZE & (CACHE_WRITEBACK_GRANULE - 1)) == 0, \
+ PLAT_PERCPU_BAKERY_LOCK_SIZE_not_cacheline_multiple);
+#define PERCPU_BAKERY_LOCK_SIZE (PLAT_PERCPU_BAKERY_LOCK_SIZE)
+#else
+/*
+ * Use the linker defined symbol which has evaluated the size reqiurement.
+ * This is not as efficient as using a platform defined constant
+ */
+extern void *__PERCPU_BAKERY_LOCK_SIZE__;
+#define PERCPU_BAKERY_LOCK_SIZE ((uintptr_t)&__PERCPU_BAKERY_LOCK_SIZE__)
+#endif
-#define get_bakery_info_by_index(offset, id, ix) \
- (((bakery_info_t *) (((uint8_t *)_cpu_data_by_index(ix)) + offset)) + id)
+#define get_bakery_info(cpu_ix, lock) \
+ (bakery_info_t *)((uintptr_t)lock + cpu_ix * PERCPU_BAKERY_LOCK_SIZE)
#define write_cache_op(addr, cached) \
do { \
@@ -73,7 +90,7 @@
#define read_cache_op(addr, cached) if (cached) \
dccivac((uint64_t)addr)
-static unsigned int bakery_get_ticket(int id, unsigned int offset,
+static unsigned int bakery_get_ticket(bakery_lock_t *lock,
unsigned int me, int is_cached)
{
unsigned int my_ticket, their_ticket;
@@ -84,7 +101,7 @@
* Obtain a reference to the bakery information for this cpu and ensure
* it is not NULL.
*/
- my_bakery_info = get_my_bakery_info(offset, id);
+ my_bakery_info = get_bakery_info(me, lock);
assert(my_bakery_info);
/*
@@ -115,7 +132,7 @@
* Get a reference to the other contender's bakery info and
* ensure that a stale copy is not read.
*/
- their_bakery_info = get_bakery_info_by_index(offset, id, they);
+ their_bakery_info = get_bakery_info(they, lock);
assert(their_bakery_info);
read_cache_op(their_bakery_info, is_cached);
@@ -141,7 +158,7 @@
return my_ticket;
}
-void bakery_lock_get(unsigned int id, unsigned int offset)
+void bakery_lock_get(bakery_lock_t *lock)
{
unsigned int they, me, is_cached;
unsigned int my_ticket, my_prio, their_ticket;
@@ -153,7 +170,7 @@
is_cached = read_sctlr_el3() & SCTLR_C_BIT;
/* Get a ticket */
- my_ticket = bakery_get_ticket(id, offset, me, is_cached);
+ my_ticket = bakery_get_ticket(lock, me, is_cached);
/*
* Now that we got our ticket, compute our priority value, then compare
@@ -168,7 +185,7 @@
* Get a reference to the other contender's bakery info and
* ensure that a stale copy is not read.
*/
- their_bakery_info = get_bakery_info_by_index(offset, id, they);
+ their_bakery_info = get_bakery_info(they, lock);
assert(their_bakery_info);
/* Wait for the contender to get their ticket */
@@ -199,12 +216,12 @@
/* Lock acquired */
}
-void bakery_lock_release(unsigned int id, unsigned int offset)
+void bakery_lock_release(bakery_lock_t *lock)
{
bakery_info_t *my_bakery_info;
unsigned int is_cached = read_sctlr_el3() & SCTLR_C_BIT;
- my_bakery_info = get_my_bakery_info(offset, id);
+ my_bakery_info = get_bakery_info(plat_my_core_pos(), lock);
assert(bakery_ticket_number(my_bakery_info->lock_data));
my_bakery_info->lock_data = 0;
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index c2a7d6a..155216a 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -85,6 +85,9 @@
#define PLAT_ARM_CCI_CLUSTER0_SL_IFACE_IX 3
#define PLAT_ARM_CCI_CLUSTER1_SL_IFACE_IX 4
+/* System timer related constants */
+#define PLAT_ARM_NSTIMER_FRAME_ID 1
+
/* TrustZone controller related constants
*
* Currently only filters 0 and 2 are connected on Base FVP.
@@ -100,6 +103,7 @@
* Give access to the CPUs and Virtio. Some devices
* would normally use the default ID so allow that too.
*/
+#define PLAT_ARM_TZC_BASE 0x2a4a0000
#define PLAT_ARM_TZC_FILTERS REG_ATTR_FILTER_BIT(0)
#define PLAT_ARM_TZC_NS_DEV_ACCESS ( \
diff --git a/plat/arm/board/juno/juno_security.c b/plat/arm/board/juno/juno_security.c
index 1de38c3..f9386ca 100644
--- a/plat/arm/board/juno/juno_security.c
+++ b/plat/arm/board/juno/juno_security.c
@@ -29,6 +29,7 @@
*/
#include <mmio.h>
+#include <nic_400.h>
#include <plat_arm.h>
#include <soc_css.h>
#include "juno_def.h"
@@ -48,12 +49,25 @@
}
/*******************************************************************************
+ * Program CSS-NIC400 to allow non-secure access to some CSS regions.
+ ******************************************************************************/
+static void css_init_nic400(void)
+{
+ /* Note: This is the NIC-400 device on the CSS */
+ mmio_write_32(PLAT_SOC_CSS_NIC400_BASE +
+ NIC400_ADDR_CTRL_SECURITY_REG(CSS_NIC400_SLAVE_BOOTSECURE),
+ ~0);
+}
+
+/*******************************************************************************
* Initialize the secure environment.
******************************************************************************/
void plat_arm_security_setup(void)
{
/* Initialize the TrustZone Controller */
arm_tzc_setup();
+ /* Do ARM CSS internal NIC setup */
+ css_init_nic400();
/* Do ARM CSS SoC security setup */
soc_css_security_setup();
/* Initialize the SMMU SSD tables*/
diff --git a/plat/arm/common/arm_bl31_setup.c b/plat/arm/common/arm_bl31_setup.c
index 3fda2ef..899463e 100644
--- a/plat/arm/common/arm_bl31_setup.c
+++ b/plat/arm/common/arm_bl31_setup.c
@@ -40,6 +40,7 @@
#include <mmio.h>
#include <plat_arm.h>
#include <platform.h>
+#include <platform_def.h>
/*
@@ -219,9 +220,9 @@
reg_val = (1 << CNTACR_RPCT_SHIFT) | (1 << CNTACR_RVCT_SHIFT);
reg_val |= (1 << CNTACR_RFRQ_SHIFT) | (1 << CNTACR_RVOFF_SHIFT);
reg_val |= (1 << CNTACR_RWVT_SHIFT) | (1 << CNTACR_RWPT_SHIFT);
- mmio_write_32(ARM_SYS_TIMCTL_BASE + CNTACR_BASE(1), reg_val);
+ mmio_write_32(ARM_SYS_TIMCTL_BASE + CNTACR_BASE(PLAT_ARM_NSTIMER_FRAME_ID), reg_val);
- reg_val = (1 << CNTNSAR_NS_SHIFT(1));
+ reg_val = (1 << CNTNSAR_NS_SHIFT(PLAT_ARM_NSTIMER_FRAME_ID));
mmio_write_32(ARM_SYS_TIMCTL_BASE + CNTNSAR, reg_val);
/* Initialize power controller before setting up topology */
diff --git a/plat/arm/common/arm_common.mk b/plat/arm/common/arm_common.mk
index 1234619..eb5ae11 100644
--- a/plat/arm/common/arm_common.mk
+++ b/plat/arm/common/arm_common.mk
@@ -74,6 +74,7 @@
plat/common/aarch64/plat_common.c
BL1_SOURCES += drivers/arm/cci/cci.c \
+ drivers/arm/ccn/ccn.c \
drivers/io/io_fip.c \
drivers/io/io_memmap.c \
drivers/io/io_storage.c \
@@ -91,6 +92,7 @@
plat/common/aarch64/platform_up_stack.S
BL31_SOURCES += drivers/arm/cci/cci.c \
+ drivers/arm/ccn/ccn.c \
drivers/arm/gic/arm_gic.c \
drivers/arm/gic/gic_v2.c \
drivers/arm/gic/gic_v3.c \
diff --git a/plat/arm/common/arm_security.c b/plat/arm/common/arm_security.c
index 8bee4fe..990d8d4 100644
--- a/plat/arm/common/arm_security.c
+++ b/plat/arm/common/arm_security.c
@@ -47,7 +47,7 @@
{
INFO("Configuring TrustZone Controller\n");
- tzc_init(ARM_TZC_BASE);
+ tzc_init(PLAT_ARM_TZC_BASE);
/* Disable filters. */
tzc_disable_filters();
diff --git a/plat/arm/soc/common/soc_css_security.c b/plat/arm/soc/common/soc_css_security.c
index 36f59ea..37fd37c 100644
--- a/plat/arm/soc/common/soc_css_security.c
+++ b/plat/arm/soc/common/soc_css_security.c
@@ -30,17 +30,10 @@
#include <board_css_def.h>
#include <mmio.h>
+#include <nic_400.h>
#include <platform_def.h>
#include <soc_css_def.h>
-/*
- * Address of slave 'n' security setting in the NIC-400 address region
- * control
- * TODO: Ideally this macro should be moved in a "nic-400.h" header file but
- * it would be the only thing in there so it's not worth it at the moment.
- */
-#define NIC400_ADDR_CTRL_SECURITY_REG(n) (0x8 + (n) * 4)
-
void soc_css_init_nic400(void)
{
/*
@@ -70,13 +63,6 @@
NIC400_ADDR_CTRL_SECURITY_REG(SOC_CSS_NIC400_BOOTSEC_BRIDGE),
~SOC_CSS_NIC400_BOOTSEC_BRIDGE_UART1);
- /*
- * Allow non-secure access to some CSS regions.
- * Note: This is the NIC-400 device on the CSS
- */
- mmio_write_32(PLAT_SOC_CSS_NIC400_BASE +
- NIC400_ADDR_CTRL_SECURITY_REG(CSS_NIC400_SLAVE_BOOTSECURE),
- ~0);
}
diff --git a/plat/mediatek/mt8173/drivers/spm/spm.c b/plat/mediatek/mt8173/drivers/spm/spm.c
index f67daea..7c6d72b 100644
--- a/plat/mediatek/mt8173/drivers/spm/spm.c
+++ b/plat/mediatek/mt8173/drivers/spm/spm.c
@@ -53,7 +53,8 @@
static int spm_dormant_sta = CPU_DORMANT_RESET;
#endif
-static bakery_lock_t spm_lock __attribute__ ((section("tzfw_coherent_mem")));
+DEFINE_BAKERY_LOCK(spm_lock);
+
static int spm_hotplug_ready __attribute__ ((section("tzfw_coherent_mem")));
static int spm_mcdi_ready __attribute__ ((section("tzfw_coherent_mem")));
static int spm_suspend_ready __attribute__ ((section("tzfw_coherent_mem")));
diff --git a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
index 0d8e370..40d1bab 100644
--- a/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
+++ b/plat/nvidia/tegra/common/drivers/memctrl/memctrl.c
@@ -107,6 +107,23 @@
tegra_mc_write_32(MC_SECURITY_CFG1_0, size_in_bytes >> 20);
}
+static void tegra_clear_videomem(uintptr_t non_overlap_area_start,
+ unsigned long long non_overlap_area_size)
+{
+ /*
+ * Perform cache maintenance to ensure that the non-overlapping area is
+ * zeroed out. The first invalidation of this range ensures that
+ * possible evictions of dirty cache lines do not interfere with the
+ * 'zeromem16' operation. Other CPUs could speculatively prefetch the
+ * main memory contents of this area between the first invalidation and
+ * the 'zeromem16' operation. The second invalidation ensures that any
+ * such cache lines are removed as well.
+ */
+ inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
+ zeromem16((void *)non_overlap_area_start, non_overlap_area_size);
+ inv_dcache_range(non_overlap_area_start, non_overlap_area_size);
+}
+
/*
* Program the Video Memory carveout region
*
@@ -118,7 +135,7 @@
uintptr_t vmem_end_old = video_mem_base + (video_mem_size << 20);
uintptr_t vmem_end_new = phys_base + size_in_bytes;
uint32_t regval;
- uint64_t size;
+ unsigned long long non_overlap_area_size;
/*
* The GPU is the user of the Video Memory region. In order to
@@ -155,15 +172,15 @@
disable_mmu_el3();
if (phys_base > vmem_end_old || video_mem_base > vmem_end_new) {
- zeromem16((void *)video_mem_base, video_mem_size << 20);
+ tegra_clear_videomem(video_mem_base, video_mem_size << 20);
} else {
if (video_mem_base < phys_base) {
- size = phys_base - video_mem_base;
- zeromem16((void *)video_mem_base, size);
+ non_overlap_area_size = phys_base - video_mem_base;
+ tegra_clear_videomem(video_mem_base, non_overlap_area_size);
}
if (vmem_end_old > vmem_end_new) {
- size = vmem_end_old - vmem_end_new;
- zeromem16((void *)vmem_end_new, size);
+ non_overlap_area_size = vmem_end_old - vmem_end_new;
+ tegra_clear_videomem(vmem_end_new, non_overlap_area_size);
}
}
enable_mmu_el3(0);
diff --git a/services/std_svc/psci/psci_common.c b/services/std_svc/psci/psci_common.c
index e12df04..7332695 100644
--- a/services/std_svc/psci/psci_common.c
+++ b/services/std_svc/psci/psci_common.c
@@ -78,6 +78,8 @@
#endif
;
+DEFINE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+
cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
/*******************************************************************************
diff --git a/services/std_svc/psci/psci_on.c b/services/std_svc/psci/psci_on.c
index cf1a782..c37adc2 100644
--- a/services/std_svc/psci/psci_on.c
+++ b/services/std_svc/psci/psci_on.c
@@ -203,7 +203,4 @@
* call to set this cpu on its way.
*/
cm_prepare_el3_exit(NON_SECURE);
-
- /* Clean caches before re-entering normal world */
- dcsw_op_louis(DCCSW);
}
diff --git a/services/std_svc/psci/psci_private.h b/services/std_svc/psci/psci_private.h
index 9b55d9f..8c028a7 100644
--- a/services/std_svc/psci/psci_private.h
+++ b/services/std_svc/psci/psci_private.h
@@ -42,23 +42,12 @@
* The following helper macros abstract the interface to the Bakery
* Lock API.
*/
-#if USE_COHERENT_MEM
-#define psci_lock_init(non_cpu_pd_node, idx) \
- bakery_lock_init(&(non_cpu_pd_node)[(idx)].lock)
-#define psci_lock_get(non_cpu_pd_node) \
- bakery_lock_get(&((non_cpu_pd_node)->lock))
-#define psci_lock_release(non_cpu_pd_node) \
- bakery_lock_release(&((non_cpu_pd_node)->lock))
-#else
#define psci_lock_init(non_cpu_pd_node, idx) \
((non_cpu_pd_node)[(idx)].lock_index = (idx))
#define psci_lock_get(non_cpu_pd_node) \
- bakery_lock_get((non_cpu_pd_node)->lock_index, \
- CPU_DATA_PSCI_LOCK_OFFSET)
+ bakery_lock_get(&psci_locks[(non_cpu_pd_node)->lock_index])
#define psci_lock_release(non_cpu_pd_node) \
- bakery_lock_release((non_cpu_pd_node)->lock_index, \
- CPU_DATA_PSCI_LOCK_OFFSET)
-#endif
+ bakery_lock_release(&psci_locks[(non_cpu_pd_node)->lock_index])
/*
* The PSCI capability which are provided by the generic code but does not
@@ -140,12 +129,9 @@
plat_local_state_t local_state;
unsigned char level;
-#if USE_COHERENT_MEM
- bakery_lock_t lock;
-#else
- /* For indexing the bakery_info array in per CPU data */
+
+ /* For indexing the psci_lock array*/
unsigned char lock_index;
-#endif
} non_cpu_pd_node_t;
typedef struct cpu_pwr_domain_node {
@@ -174,6 +160,9 @@
extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
extern unsigned int psci_caps;
+/* One bakery lock is required for each non-cpu power domain */
+DECLARE_BAKERY_LOCK(psci_locks[PSCI_NUM_NON_CPU_PWR_DOMAINS]);
+
/*******************************************************************************
* SPD's power management hooks registered with PSCI
******************************************************************************/
diff --git a/services/std_svc/psci/psci_setup.c b/services/std_svc/psci/psci_setup.c
index 94fe630..cd1bb09 100644
--- a/services/std_svc/psci/psci_setup.c
+++ b/services/std_svc/psci/psci_setup.c
@@ -181,12 +181,6 @@
/* Validate the sanity of array exported by the platform */
assert(j == PLATFORM_CORE_COUNT);
-
-#if !USE_COHERENT_MEM
- /* Flush the non CPU power domain data to memory */
- flush_dcache_range((uintptr_t) &psci_non_cpu_pd_nodes,
- sizeof(psci_non_cpu_pd_nodes));
-#endif
}
/*******************************************************************************
@@ -227,18 +221,6 @@
psci_cpu_pd_nodes[plat_my_core_pos()].mpidr =
read_mpidr() & MPIDR_AFFINITY_MASK;
-#if !USE_COHERENT_MEM
- /*
- * The psci_non_cpu_pd_nodes only needs flushing when it's not allocated in
- * coherent memory.
- */
- flush_dcache_range((uintptr_t) &psci_non_cpu_pd_nodes,
- sizeof(psci_non_cpu_pd_nodes));
-#endif
-
- flush_dcache_range((uintptr_t) &psci_cpu_pd_nodes,
- sizeof(psci_cpu_pd_nodes));
-
psci_init_req_local_pwr_states();
/*
diff --git a/services/std_svc/psci/psci_suspend.c b/services/std_svc/psci/psci_suspend.c
index 675ef9e..bd0c5db 100644
--- a/services/std_svc/psci/psci_suspend.c
+++ b/services/std_svc/psci/psci_suspend.c
@@ -261,7 +261,4 @@
* call to set this cpu on its way.
*/
cm_prepare_el3_exit(NON_SECURE);
-
- /* Clean caches before re-entering normal world */
- dcsw_op_louis(DCCSW);
}