cpus: higher performance non-cacheable load forwarding

The CPUACTLR_EL1 register on Cortex-A57 CPUs supports a bit to enable
non-cacheable streaming enhancement. Platforms can set this bit only
if their memory system meets the requirement that cache line fill
requests from the Cortex-A57 processor are atomic.

This patch adds support to enable higher performance non-cacheable load
forwarding for such platforms. Platforms must enable this support by
setting the 'A57_ENABLE_NONCACHEABLE_LOAD_FWD' flag from their
makefiles. This flag is disabled by default.

Change-Id: Ib27e55dd68d11a50962c0bbc5b89072208b4bac5
Signed-off-by: Varun Wadekar <vwadekar@nvidia.com>
diff --git a/lib/cpus/cpu-ops.mk b/lib/cpus/cpu-ops.mk
index e3bfc2f..3c0c9cd 100644
--- a/lib/cpus/cpu-ops.mk
+++ b/lib/cpus/cpu-ops.mk
@@ -1,5 +1,6 @@
 #
 # Copyright (c) 2014-2020, ARM Limited and Contributors. All rights reserved.
+# Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 #
@@ -16,6 +17,10 @@
 # It is enabled by default.
 A57_DISABLE_NON_TEMPORAL_HINT	?=1
 
+# Flag to enable higher performance non-cacheable load forwarding.
+# It is disabled by default.
+A57_ENABLE_NONCACHEABLE_LOAD_FWD	?= 0
+
 WORKAROUND_CVE_2017_5715	?=1
 WORKAROUND_CVE_2018_3639	?=1
 DYNAMIC_WORKAROUND_CVE_2018_3639	?=0
@@ -24,6 +29,10 @@
 # By default internal
 NEOVERSE_N1_EXTERNAL_LLC	?=0
 
+# Process A57_ENABLE_NONCACHEABLE_LOAD_FWD flag
+$(eval $(call assert_boolean,A57_ENABLE_NONCACHEABLE_LOAD_FWD))
+$(eval $(call add_define,A57_ENABLE_NONCACHEABLE_LOAD_FWD))
+
 # Process SKIP_A57_L1_FLUSH_PWR_DWN flag
 $(eval $(call assert_boolean,SKIP_A57_L1_FLUSH_PWR_DWN))
 $(eval $(call add_define,SKIP_A57_L1_FLUSH_PWR_DWN))