Merge "refactor(xilinx): rename gic macros to make common" into integration
diff --git a/Makefile b/Makefile
index ca97fc2..52d6cf5 100644
--- a/Makefile
+++ b/Makefile
@@ -1092,7 +1092,6 @@
         DISABLE_MTPMU \
         DYN_DISABLE_AUTH \
         EL3_EXCEPTION_HANDLING \
-        ENABLE_AMU \
         ENABLE_AMU_AUXILIARY_COUNTERS \
         ENABLE_AMU_FCONF \
         AMU_RESTRICT_COUNTERS \
@@ -1121,6 +1120,7 @@
         PLAT_RSS_NOT_SUPPORTED \
         PROGRAMMABLE_RESET_ADDRESS \
         PSCI_EXTENDED_STATE_ID \
+        PSCI_OS_INIT_MODE \
         RESET_TO_BL31 \
         RESET_TO_BL31_WITH_PARAMS \
         SAVE_KEYS \
@@ -1172,7 +1172,7 @@
         ENABLE_TRBE_FOR_NS \
         ENABLE_BTI \
         ENABLE_PAUTH \
-        ENABLE_FEAT_AMUv1 \
+        ENABLE_FEAT_AMU \
         ENABLE_FEAT_AMUv1p1 \
         ENABLE_FEAT_CSV2_2 \
         ENABLE_FEAT_DIT \
@@ -1229,7 +1229,7 @@
         CTX_INCLUDE_NEVE_REGS \
         DECRYPTION_SUPPORT_${DECRYPTION_SUPPORT} \
         DISABLE_MTPMU \
-        ENABLE_AMU \
+        ENABLE_FEAT_AMU \
         ENABLE_AMU_AUXILIARY_COUNTERS \
         ENABLE_AMU_FCONF \
         AMU_RESTRICT_COUNTERS \
@@ -1263,6 +1263,7 @@
         PLAT_RSS_NOT_SUPPORTED \
         PROGRAMMABLE_RESET_ADDRESS \
         PSCI_EXTENDED_STATE_ID \
+        PSCI_OS_INIT_MODE \
         RAS_EXTENSION \
         RESET_TO_BL31 \
         RESET_TO_BL31_WITH_PARAMS \
@@ -1310,7 +1311,6 @@
         ENABLE_MPMM \
         ENABLE_MPMM_FCONF \
         ENABLE_FEAT_FGT \
-        ENABLE_FEAT_AMUv1 \
         ENABLE_FEAT_ECV \
         SIMICS_BUILD \
         ENABLE_FEAT_AMUv1p1 \
diff --git a/bl31/bl31.mk b/bl31/bl31.mk
index 006843e..bf907ea 100644
--- a/bl31/bl31.mk
+++ b/bl31/bl31.mk
@@ -91,7 +91,7 @@
 BL31_SOURCES		+=	lib/extensions/spe/spe.c
 endif
 
-ifeq (${ENABLE_AMU},1)
+ifneq (${ENABLE_FEAT_AMU},0)
 BL31_SOURCES		+=	${AMU_SOURCES}
 endif
 
diff --git a/bl32/sp_min/sp_min.mk b/bl32/sp_min/sp_min.mk
index e85e273..0e5c142 100644
--- a/bl32/sp_min/sp_min.mk
+++ b/bl32/sp_min/sp_min.mk
@@ -28,7 +28,7 @@
 BL32_SOURCES		+=	lib/pmf/pmf_main.c
 endif
 
-ifeq (${ENABLE_AMU},1)
+ifneq (${ENABLE_FEAT_AMU},0)
 BL32_SOURCES		+=	${AMU_SOURCES}
 endif
 
diff --git a/common/feat_detect.c b/common/feat_detect.c
index 9218c07..12cf126 100644
--- a/common/feat_detect.c
+++ b/common/feat_detect.c
@@ -112,16 +112,6 @@
 #endif
 }
 
-/***********************************************
- * Feature : FEAT_AMUv1p1 (AMU Extensions v1.1)
- **********************************************/
-static void read_feat_amuv1p1(void)
-{
-#if (ENABLE_FEAT_AMUv1p1 == FEAT_STATE_ALWAYS)
-	feat_detect_panic(is_armv8_6_feat_amuv1p1_present(), "AMUv1p1");
-#endif
-}
-
 /**************************************************
  * Feature : FEAT_RME (Realm Management Extension)
  *************************************************/
@@ -187,7 +177,7 @@
 
 	/* v8.4 features */
 	read_feat_dit();
-	check_feature(ENABLE_FEAT_AMUv1, read_feat_amu_id_field(),
+	check_feature(ENABLE_FEAT_AMU, read_feat_amu_id_field(),
 		      "AMUv1", 1, 2);
 	check_feature(ENABLE_MPAM_FOR_LOWER_ELS, read_feat_mpam_version(),
 		      "MPAM", 1, 17);
@@ -205,7 +195,8 @@
 	read_feat_rng_trap();
 
 	/* v8.6 features */
-	read_feat_amuv1p1();
+	check_feature(ENABLE_FEAT_AMUv1p1, read_feat_amu_id_field(),
+		      "AMUv1p1", 2, 2);
 	check_feature(ENABLE_FEAT_FGT, read_feat_fgt_id_field(), "FGT", 1, 1);
 	check_feature(ENABLE_FEAT_ECV, read_feat_ecv_id_field(), "ECV", 1, 2);
 	check_feature(ENABLE_FEAT_TWED, read_feat_twed_id_field(),
diff --git a/docs/components/activity-monitors.rst b/docs/components/activity-monitors.rst
index dd45c43..5c1c2c2 100644
--- a/docs/components/activity-monitors.rst
+++ b/docs/components/activity-monitors.rst
@@ -6,9 +6,9 @@
 Unit (|AMU|), an optional non-invasive component for monitoring core events
 through a set of 64-bit counters.
 
-When the ``ENABLE_AMU=1`` build option is provided, Trusted Firmware-A sets up
-the |AMU| prior to its exit from EL3, and will save and restore architected
-|AMU| counters as necessary upon suspend and resume.
+When the ``ENABLE_FEAT_AMU=1`` build option is provided, Trusted Firmware-A
+sets up the |AMU| prior to its exit from EL3, and will save and restore
+architected |AMU| counters as necessary upon suspend and resume.
 
 .. _Activity Monitor Auxiliary Counters:
 
diff --git a/docs/design_documents/index.rst b/docs/design_documents/index.rst
index 3d82e69..d20fc58 100644
--- a/docs/design_documents/index.rst
+++ b/docs/design_documents/index.rst
@@ -10,6 +10,7 @@
    measured_boot_poc
    drtm_poc
    rss
+   psci_osi_mode
 
 --------------
 
diff --git a/docs/design_documents/psci_osi_mode.rst b/docs/design_documents/psci_osi_mode.rst
new file mode 100644
index 0000000..3296e27
--- /dev/null
+++ b/docs/design_documents/psci_osi_mode.rst
@@ -0,0 +1,716 @@
+PSCI OS-initiated mode
+======================
+
+:Author: Maulik Shah & Wing Li
+:Organization: Qualcomm Innovation Center, Inc. & Google LLC
+:Contact: Maulik Shah <quic_mkshah@quicinc.com> & Wing Li <wingers@google.com>
+:Status: RFC
+
+.. contents:: Table of Contents
+
+Introduction
+------------
+
+Power state coordination
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+A power domain topology is a logical hierarchy of power domains in a system that
+arises from the physical dependencies between power domains.
+
+Local power states describe power states for an individual node, and composite
+power states describe the combined power states for an individual node and its
+parent node(s).
+
+Entry into low-power states for a topology node above the core level requires
+coordinating its children nodes. For example, in a system with a power domain
+that encompasses a shared cache, and a separate power domain for each core that
+uses the shared cache, the core power domains must be powered down before the
+shared cache power domain can be powered down.
+
+PSCI supports two modes of power state coordination: platform-coordinated and
+OS-initiated.
+
+Platform-coordinated
+~~~~~~~~~~~~~~~~~~~~
+
+Platform-coordinated mode is the default mode of power state coordination, and
+is currently the only supported mode in TF-A.
+
+In platform-coordinated mode, the platform is responsible for coordinating power
+states, and chooses the deepest power state for a topology node that can be
+tolerated by its children.
+
+OS-initiated
+~~~~~~~~~~~~
+
+OS-initiated mode is optional.
+
+In OS-initiated mode, the calling OS is responsible for coordinating power
+states, and may request for a topology node to enter a low-power state when
+its last child enters the low-power state.
+
+Motivation
+----------
+
+There are two reasons why OS-initiated mode might be a more suitable option than
+platform-coordinated mode for a platform.
+
+Scalability
+^^^^^^^^^^^
+
+In platform-coordinated mode, each core independently selects their own local
+power states, and doesn't account for composite power states that are shared
+between cores.
+
+In OS-initiated mode, the OS has knowledge of the next wakeup event for each
+core, and can have more precise control over the entry, exit, and wakeup
+latencies when deciding if a composite power state (e.g. for a cluster) is
+appropriate. This is especially important for multi-cluster SMP systems and
+heterogeneous systems like big.LITTLE, where different processor types can have
+different power efficiencies.
+
+Simplicity
+^^^^^^^^^^
+
+In platform-coordinated mode, the OS doesn't have visibility when the last core
+at a power level enters a low-power state. If the OS wants to perform last man
+activity (e.g. powering off a shared resource when it is no longer needed), it
+would have to communicate with an API side channel to know when it can do so.
+This could result in a design smell where the platform is using
+platform-coordinated mode when it should be using OS-initiated mode instead.
+
+In OS-initiated mode, the OS can perform last man activity if it selects a
+composite power state when the last core enters a low-power state. This
+eliminates the need for a side channel, and uses the well documented API between
+the OS and the platform.
+
+Current vendor implementations and workarounds
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* STMicroelectronics
+
+  * For their ARM32 platforms, they're using OS-initiated mode implemented in
+    OP-TEE.
+  * For their future ARM64 platforms, they are interested in using OS-initiated
+    mode in TF-A.
+
+* Qualcomm
+
+  * For their mobile platforms, they're using OS-initiated mode implemented in
+    their own custom secure monitor firmware.
+  * For their Chrome OS platforms, they're using platform-coordinated mode in
+    TF-A with custom driver logic to perform last man activity.
+
+* Google
+
+  * They're using platform-coordinated mode in TF-A with custom driver logic to
+    perform last man activity.
+
+Both Qualcomm and Google would like to be able to use OS-initiated mode in TF-A
+in order to simplify custom driver logic.
+
+Requirements
+------------
+
+PSCI_FEATURES
+^^^^^^^^^^^^^
+
+PSCI_FEATURES is for checking whether or not a PSCI function is implemented and
+what its properties are.
+
+.. c:macro:: PSCI_FEATURES
+
+   :param func_id: 0x8400_000A.
+   :param psci_func_id: the function ID of a PSCI function.
+   :retval NOT_SUPPORTED: if the function is not implemented.
+   :retval feature flags associated with the function: if the function is
+       implemented.
+
+CPU_SUSPEND feature flags
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* Reserved, bits[31:2]
+* Power state parameter format, bit[1]
+
+  * A value of 0 indicates the original format is used.
+  * A value of 1 indicates the extended format is used.
+
+* OS-initiated mode, bit[0]
+
+  * A value of 0 indicates OS-initiated mode is not supported.
+  * A value of 1 indicates OS-initiated mode is supported.
+
+See sections 5.1.14 and 5.15 of the PSCI spec (DEN0022D.b) for more details.
+
+PSCI_SET_SUSPEND_MODE
+^^^^^^^^^^^^^^^^^^^^^
+
+PSCI_SET_SUSPEND_MODE is for switching between the two different modes of power
+state coordination.
+
+.. c:macro:: PSCI_SET_SUSPEND_MODE
+
+   :param func_id: 0x8400_000F.
+   :param mode: 0 indicates platform-coordinated mode, 1 indicates OS-initiated
+       mode.
+   :retval SUCCESS: if the request is successful.
+   :retval NOT_SUPPORTED: if OS-initiated mode is not supported.
+   :retval INVALID_PARAMETERS: if the requested mode is not a valid value (0 or
+       1).
+   :retval DENIED: if the cores are not in the correct state.
+
+Switching from platform-coordinated to OS-initiated is only allowed if the
+following conditions are met:
+
+* All cores are in one of the following states:
+
+  * Running.
+  * Off, through a call to CPU_OFF or not yet booted.
+  * Suspended, through a call to CPU_DEFAULT_SUSPEND.
+
+* None of the cores has called CPU_SUSPEND since the last change of mode or
+  boot.
+
+Switching from OS-initiated to platform-coordinated is only allowed if all cores
+other than the calling core are off, either through a call to CPU_OFF or not yet
+booted.
+
+If these conditions are not met, the PSCI implementation must return DENIED.
+
+See sections 5.1.19 and 5.20 of the PSCI spec (DEN0022D.b) for more details.
+
+CPU_SUSPEND
+^^^^^^^^^^^
+
+CPU_SUSPEND is for moving a topology node into a low-power state.
+
+.. c:macro:: CPU_SUSPEND
+
+   :param func_id: 0xC400_0001.
+   :param power_state: the requested low-power state to enter.
+   :param entry_point_address: the address at which the core must resume
+       execution following wakeup from a powerdown state.
+   :param context_id: this field specifies a pointer to the saved context that
+       must be restored on a core following wakeup from a powerdown state.
+   :retval SUCCESS: if the request is successful.
+   :retval INVALID_PARAMETERS: in OS-initiated mode, this error is returned when
+       a low-power state is requested for a topology node above the core level,
+       and at least one of the node's children is in a local low-power state
+       that is incompatible with the request.
+   :retval INVALID_ADDRESS: if the entry_point_address argument is invalid.
+   :retval DENIED: only in OS-initiated mode; this error is returned when a
+       low-power state is requested for a topology node above the core level,
+       and at least one of the node's children is running, i.e. not in a
+       low-power state.
+
+In platform-coordinated mode, the PSCI implementation coordinates requests from
+all cores to determine the deepest power state to enter.
+
+In OS-initiated mode, the calling OS is making an explicit request for a
+specific power state, as opposed to expressing a vote. The PSCI implementation
+must comply with the request, unless the request is not consistent with the
+implementation's view of the system's state, in which case, the implementation
+must return INVALID_PARAMETERS or DENIED.
+
+See sections 5.1.2 and 5.4 of the PSCI spec (DEN0022D.b) for more details.
+
+Power state formats
+~~~~~~~~~~~~~~~~~~~
+
+Original format
+
+* Power Level, bits[25:24]
+
+  * The requested level in the power domain topology to enter a low-power
+    state.
+
+* State Type, bit[16]
+
+  * A value of 0 indicates a standby or retention state.
+  * A value of 1 indicates a powerdown state.
+
+* State ID, bits[15:0]
+
+  * Field to specify the requested composite power state.
+  * The state ID encodings must uniquely describe every possible composite
+    power state.
+  * In OS-initiated mode, the state ID encoding must allow expressing the
+    power level at which the calling core is the last to enter a powerdown
+    state.
+
+Extended format
+
+* State Type, bit[30]
+* State ID, bits[27:0]
+
+Races in OS-initiated mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In OS-initiated mode, there are race windows where the OS's view and
+implementation's view of the system's state differ. It is possible for the OS to
+make requests that are invalid given the implementation's view of the system's
+state. For example, the OS might request a powerdown state for a node from one
+core, while at the same time, the implementation observes that another core in
+that node is powering up.
+
+To address potential race conditions in power state requests:
+
+* The calling OS must specify in each CPU_SUSPEND request the deepest power
+  level for which it sees the calling core as the last running core (last man).
+  This is required even if the OS doesn't want the node at that power level to
+  enter a low-power state.
+* The implementation must validate that the requested power states in the
+  CPU_SUSPEND request are consistent with the system's state, and that the
+  calling core is the last core running at the requested power level, or deny
+  the request otherwise.
+
+See sections 4.2.3.2, 6.2, and 6.3 of the PSCI spec (DEN0022D.b) for more
+details.
+
+Caveats
+-------
+
+CPU_OFF
+^^^^^^^
+
+CPU_OFF is always platform-coordinated, regardless of whether the power state
+coordination mode for suspend is platform-coordinated or OS-initiated. If all
+cores in a topology node call CPU_OFF, the last core will power down the node.
+
+In OS-initiated mode, if a subset of the cores in a topology node has called
+CPU_OFF, the last running core may call CPU_SUSPEND to request a powerdown state
+at or above that node's power level.
+
+See section 5.5.2 of the PSCI spec (DEN0022D.b) for more details.
+
+Implementation
+--------------
+
+Current implementation of platform-coordinated mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Platform-coordinated is currently the only supported power state coordination
+mode in TF-A.
+
+The functions of interest in the ``psci_cpu_suspend`` call stack are as follows:
+
+* ``psci_validate_power_state``
+
+  * This function calls a platform specific ``validate_power_state`` handler,
+    which takes the ``power_state`` parameter, and updates the ``state_info``
+    object with the requested states for each power level.
+
+* ``psci_find_target_suspend_lvl``
+
+  * This function takes the ``state_info`` object containing the requested power
+    states for each power level, and returns the deepest power level that was
+    requested to enter a low power state, i.e. the target power level.
+
+* ``psci_do_state_coordination``
+
+  * This function takes the target power level and the ``state_info`` object
+    containing the requested power states for each power level, and updates the
+    ``state_info`` object with the coordinated target power state for each
+    level.
+
+* ``pwr_domain_suspend``
+
+  * This is a platform specific handler that takes the ``state_info`` object
+    containing the target power states for each power level, and transitions
+    each power level to the specified power state.
+
+Proposed implementation of OS-initiated mode
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To add support for OS-initiated mode, the following changes are proposed:
+
+* Add a boolean build option ``PSCI_OS_INIT_MODE`` for a platform to enable
+  optional support for PSCI OS-initiated mode. This build option defaults to 0.
+
+.. note::
+
+   If ``PSCI_OS_INIT_MODE=0``, the following changes will not be compiled into
+   the build.
+
+* Update ``psci_features`` to return 1 in bit[0] to indicate support for
+  OS-initiated mode for CPU_SUSPEND.
+* Define a ``suspend_mode`` enum: ``PLAT_COORD`` and ``OS_INIT``.
+* Define a ``psci_suspend_mode`` global variable with a default value of
+  ``PLAT_COORD``.
+* Implement a new function handler ``psci_set_suspend_mode`` for
+  PSCI_SET_SUSPEND_MODE.
+* Since ``psci_validate_power_state`` calls a platform specific
+  ``validate_power_state`` handler, the platform implementation should populate
+  the ``state_info`` object based on the state ID from the given ``power_state``
+  parameter.
+* ``psci_find_target_suspend_lvl`` remains unchanged.
+* Implement a new function ``psci_validate_state_coordination`` that ensures the
+  request satisfies the following conditions, and denies any requests
+  that don't:
+
+  * The requested power states for each power level are consistent with the
+    system's state
+  * The calling core is the last core running at the requested power level
+
+  This function differs from ``psci_do_state_coordination`` in that:
+
+  * The ``psci_req_local_pwr_states`` map is not modified if the request were to
+    be denied
+  * The ``state_info`` argument is never modified since it contains the power
+    states requested by the calling OS
+
+* Update ``psci_cpu_suspend_start`` to do the following:
+
+  * If ``PSCI_SUSPEND_MODE`` is ``PLAT_COORD``, call
+    ``psci_do_state_coordination``.
+  * If ``PSCI_SUSPEND_MODE`` is ``OS_INIT``, call
+    ``psci_validate_state_coordination``. If validation fails, propagate the
+    error up the call stack.
+
+* Update the return type of the platform specific ``pwr_domain_suspend``
+  handler from ``void`` to ``int``, to allow the platform to optionally perform
+  validations based on hardware states.
+
+.. image:: ../resources/diagrams/psci-osi-mode.png
+
+Testing
+-------
+
+The proposed patches can be found at
+https://review.trustedfirmware.org/q/topic:psci-osi.
+
+Testing on FVP and Google platforms
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The proposed patches add a new CPU Suspend in OSI mode test suite to TF-A Tests.
+This has been enabled and verified on the FVP_Base_RevC-2xAEMvA platform and
+Google platforms, and excluded from all other platforms via the build option
+``PLAT_TESTS_SKIP_LIST``.
+
+Testing on STM32MP15
+^^^^^^^^^^^^^^^^^^^^
+
+The proposed patches have been tested and verified on the STM32MP15 platform,
+which has a single cluster with 2 CPUs, by Gabriel Fernandez
+<gabriel.fernandez@st.com> from STMicroelectronics with this device tree
+configuration:
+
+.. code-block:: devicetree
+
+   cpus {
+           #address-cells = <1>;
+           #size-cells = <0>;
+
+           cpu0: cpu@0 {
+                   device_type = "cpu";
+                   compatible = "arm,cortex-a7";
+                   reg = <0>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD0>;
+                   power-domain-names = "psci";
+           };
+           cpu1: cpu@1 {
+                   device_type = "cpu";
+                   compatible = "arm,cortex-a7";
+                   reg = <1>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD1>;
+                   power-domain-names = "psci";
+           };
+
+           idle-states {
+                   cpu_retention: cpu-retention {
+                           compatible = "arm,idle-state";
+                           arm,psci-suspend-param = <0x00000001>;
+                           entry-latency-us = <130>;
+                           exit-latency-us = <620>;
+                           min-residency-us = <700>;
+                           local-timer-stop;
+                   };
+           };
+
+           domain-idle-states {
+                   CLUSTER_STOP: core-power-domain {
+                           compatible = "domain-idle-state";
+                           arm,psci-suspend-param = <0x01000001>;
+                           entry-latency-us = <230>;
+                           exit-latency-us = <720>;
+                           min-residency-us = <2000>;
+                           local-timer-stop;
+                   };
+           };
+   };
+
+   psci {
+           compatible = "arm,psci-1.0";
+           method = "smc";
+
+           CPU_PD0: power-domain-cpu0 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&pd_core>;
+                   domain-idle-states = <&cpu_retention>;
+           };
+
+           CPU_PD1: power-domain-cpu1 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&pd_core>;
+                   domain-idle-states = <&cpu_retention>;
+           };
+
+           pd_core: power-domain-cluster {
+                   #power-domain-cells = <0>;
+                   domain-idle-states = <&CLUSTER_STOP>;
+           };
+   };
+
+Testing on Qualcomm SC7280
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The proposed patches have been tested and verified on the SC7280 platform by
+Maulik Shah <quic_mkshah@quicinc.com> from Qualcomm with this device tree
+configuration:
+
+.. code-block:: devicetree
+
+   cpus {
+           #address-cells = <2>;
+           #size-cells = <0>;
+
+           CPU0: cpu@0 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x0>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD0>;
+                   power-domain-names = "psci";
+           };
+
+           CPU1: cpu@100 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x100>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD1>;
+                   power-domain-names = "psci";
+           };
+
+           CPU2: cpu@200 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x200>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD2>;
+                   power-domain-names = "psci";
+           };
+
+           CPU3: cpu@300 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x300>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD3>;
+                   power-domain-names = "psci";
+           }
+
+           CPU4: cpu@400 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x400>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD4>;
+                   power-domain-names = "psci";
+           };
+
+           CPU5: cpu@500 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x500>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD5>;
+                   power-domain-names = "psci";
+           };
+
+           CPU6: cpu@600 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x600>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD6>;
+                   power-domain-names = "psci";
+           };
+
+           CPU7: cpu@700 {
+                   device_type = "cpu";
+                   compatible = "arm,kryo";
+                   reg = <0x0 0x700>;
+                   enable-method = "psci";
+                   power-domains = <&CPU_PD7>;
+                   power-domain-names = "psci";
+           };
+
+           idle-states {
+                   entry-method = "psci";
+
+                   LITTLE_CPU_SLEEP_0: cpu-sleep-0-0 {
+                           compatible = "arm,idle-state";
+                           idle-state-name = "little-power-down";
+                           arm,psci-suspend-param = <0x40000003>;
+                           entry-latency-us = <549>;
+                           exit-latency-us = <901>;
+                           min-residency-us = <1774>;
+                           local-timer-stop;
+                   };
+
+                   LITTLE_CPU_SLEEP_1: cpu-sleep-0-1 {
+                           compatible = "arm,idle-state";
+                           idle-state-name = "little-rail-power-down";
+                           arm,psci-suspend-param = <0x40000004>;
+                           entry-latency-us = <702>;
+                           exit-latency-us = <915>;
+                           min-residency-us = <4001>;
+                           local-timer-stop;
+                   };
+
+                   BIG_CPU_SLEEP_0: cpu-sleep-1-0 {
+                           compatible = "arm,idle-state";
+                           idle-state-name = "big-power-down";
+                           arm,psci-suspend-param = <0x40000003>;
+                           entry-latency-us = <523>;
+                           exit-latency-us = <1244>;
+                           min-residency-us = <2207>;
+                           local-timer-stop;
+                   };
+
+                   BIG_CPU_SLEEP_1: cpu-sleep-1-1 {
+                           compatible = "arm,idle-state";
+                           idle-state-name = "big-rail-power-down";
+                           arm,psci-suspend-param = <0x40000004>;
+                           entry-latency-us = <526>;
+                           exit-latency-us = <1854>;
+                           min-residency-us = <5555>;
+                           local-timer-stop;
+                   };
+           };
+
+           domain-idle-states {
+                   CLUSTER_SLEEP_0: cluster-sleep-0 {
+                           compatible = "arm,idle-state";
+                           idle-state-name = "cluster-power-down";
+                           arm,psci-suspend-param = <0x40003444>;
+                           entry-latency-us = <3263>;
+                           exit-latency-us = <6562>;
+                           min-residency-us = <9926>;
+                           local-timer-stop;
+                   };
+           };
+   };
+
+   psci {
+           compatible = "arm,psci-1.0";
+           method = "smc";
+
+           CPU_PD0: cpu0 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&LITTLE_CPU_SLEEP_0 &LITTLE_CPU_SLEEP_1>;
+           };
+
+           CPU_PD1: cpu1 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&LITTLE_CPU_SLEEP_0 &LITTLE_CPU_SLEEP_1>;
+           };
+
+           CPU_PD2: cpu2 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&LITTLE_CPU_SLEEP_0 &LITTLE_CPU_SLEEP_1>;
+           };
+
+           CPU_PD3: cpu3 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&LITTLE_CPU_SLEEP_0 &LITTLE_CPU_SLEEP_1>;
+           };
+
+           CPU_PD4: cpu4 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&BIG_CPU_SLEEP_0 &BIG_CPU_SLEEP_1>;
+           };
+
+           CPU_PD5: cpu5 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&BIG_CPU_SLEEP_0 &BIG_CPU_SLEEP_1>;
+           };
+
+           CPU_PD6: cpu6 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&BIG_CPU_SLEEP_0 &BIG_CPU_SLEEP_1>;
+           };
+
+           CPU_PD7: cpu7 {
+                   #power-domain-cells = <0>;
+                   power-domains = <&CLUSTER_PD>;
+                   domain-idle-states = <&BIG_CPU_SLEEP_0 &BIG_CPU_SLEEP_1>;
+           };
+
+           CLUSTER_PD: cpu-cluster0 {
+                   #power-domain-cells = <0>;
+                   domain-idle-states = <&CLUSTER_SLEEP_0>;
+           };
+   };
+
+Comparisons on Qualcomm SC7280
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+CPUIdle states
+~~~~~~~~~~~~~~
+
+* 8 CPUs, 1 L3 cache
+* Platform-coordinated mode
+
+  * CPUIdle states
+
+    * State0 - WFI
+    * State1 - Core collapse
+    * State2 - Rail collapse
+    * State3 - L3 cache off and system resources voted off
+
+* OS-initiated mode
+
+  * CPUIdle states
+
+    * State0 - WFI
+    * State1 - Core collapse
+    * State2 - Rail collapse
+
+  * Cluster domain idle state
+
+    * State3 - L3 cache off and system resources voted off
+
+.. image:: ../resources/diagrams/psci-flattened-vs-hierarchical-idle-states.png
+
+Results
+~~~~~~~
+
+* The following stats have been captured with fixed CPU frequencies from the use
+  case of 10 seconds of device idle with the display turned on and Wi-Fi and
+  modem turned off.
+* Count refers to the number of times a CPU or cluster entered power collapse.
+* Residency refers to the time in seconds a CPU or cluster stayed in power
+  collapse.
+* The results are an average of 3 iterations of actual counts and residencies.
+
+.. image:: ../resources/diagrams/psci-pc-mode-vs-osi-mode.png
+
+OS-initiated mode was able to scale better than platform-coordinated mode for
+multiple CPUs. The count and residency results for state3 (i.e. a cluster domain
+idle state) in OS-initiated mode for multiple CPUs were much closer to the
+results for a single CPU than in platform-coordinated mode.
+
+--------------
+
+*Copyright (c) 2023, Arm Limited and Contributors. All rights reserved.*
diff --git a/docs/getting_started/build-options.rst b/docs/getting_started/build-options.rst
index 0540b6d..20d8807 100644
--- a/docs/getting_started/build-options.rst
+++ b/docs/getting_started/build-options.rst
@@ -230,11 +230,6 @@
    payload. Please refer to the "Booting an EL3 payload" section for more
    details.
 
--  ``ENABLE_AMU``: Boolean option to enable Activity Monitor Unit extensions.
-   This is an optional architectural feature available on v8.4 onwards. Some
-   v8.2 implementations also implement an AMU and this option can be used to
-   enable this feature on those systems as well. Default is 0.
-
 -  ``ENABLE_AMU_AUXILIARY_COUNTERS``: Enables support for AMU auxiliary counters
    (also known as group 1 counters). These are implementation-defined counters,
    and as such require additional platform configuration. Default is 0.
@@ -261,13 +256,12 @@
    builds, but this behaviour can be overridden in each platform's Makefile or
    in the build command line.
 
--  ``ENABLE_FEAT_AMUv1``: Numeric value to enable access to the HAFGRTR_EL2
-   (Hypervisor Activity Monitors Fine-Grained Read Trap Register) during EL2
-   to EL3 context save/restore operations. This flag can take the values 0 to 2,
-   to align with the ``FEATURE_DETECTION`` mechanism. It is an optional feature
-   available on v8.4 and onwards and must be set to either 1 or 2 alongside
-   ``ENABLE_FEAT_FGT``, to access the HAFGRTR_EL2 register.
-   Default value is ``0``.
+-  ``ENABLE_FEAT_AMU``: Numeric value to enable Activity Monitor Unit
+   extensions. This flag can take the values 0 to 2, to align with the
+   ``FEATURE_DETECTION`` mechanism. This is an optional architectural feature
+   available on v8.4 onwards. Some v8.2 implementations also implement an AMU
+   and this option can be used to enable this feature on those systems as well.
+   This flag can take the values 0 to 2, the default is 0.
 
 -  ``ENABLE_FEAT_AMUv1p1``: Numeric value to enable the ``FEAT_AMUv1p1``
    extension. ``FEAT_AMUv1p1`` is an optional feature available on Arm v8.6
@@ -740,6 +734,9 @@
    enabled on Arm platforms, the option ``ARM_RECOM_STATE_ID_ENC`` needs to be
    set to 1 as well.
 
+-  ``PSCI_OS_INIT_MODE``: Boolean flag to enable support for optional PSCI
+   OS-initiated mode. This option defaults to 0.
+
 -  ``RAS_EXTENSION``: Numeric value to enable Armv8.2 RAS features. RAS features
    are an optional extension for pre-Armv8.2 CPUs, but are mandatory for Armv8.2
    or later CPUs. This flag can take the values 0 to 2, to align with the
diff --git a/docs/getting_started/porting-guide.rst b/docs/getting_started/porting-guide.rst
index 6735cb1..8d6a2bf 100644
--- a/docs/getting_started/porting-guide.rst
+++ b/docs/getting_started/porting-guide.rst
@@ -538,6 +538,15 @@
 
    Defines the maximum address that the TSP's progbits sections can occupy.
 
+If the platform supports OS-initiated mode, i.e. the build option
+``PSCI_OS_INIT_MODE`` is enabled, and if the platform's maximum power domain
+level for PSCI_CPU_SUSPEND differs from ``PLAT_MAX_PWR_LVL``, the following
+constant must be defined.
+
+-  **#define : PLAT_MAX_CPU_SUSPEND_PWR_LVL**
+
+   Defines the maximum power domain level that PSCI_CPU_SUSPEND should apply to.
+
 If the platform port uses the PL061 GPIO driver, the following constant may
 optionally be defined:
 
@@ -2810,6 +2819,10 @@
 data, for example in DRAM. The Distributor can then be powered down using an
 implementation-defined sequence.
 
+If the build option ``PSCI_OS_INIT_MODE`` is enabled, the generic code expects
+the platform to return PSCI_E_SUCCESS on success, or either PSCI_E_DENIED or
+PSCI_E_INVALID_PARAMS as appropriate for any invalid requests.
+
 plat_psci_ops.pwr_domain_pwr_down_wfi()
 .......................................
 
diff --git a/docs/resources/diagrams/psci-flattened-vs-hierarchical-idle-states.png b/docs/resources/diagrams/psci-flattened-vs-hierarchical-idle-states.png
new file mode 100644
index 0000000..7c41f75
--- /dev/null
+++ b/docs/resources/diagrams/psci-flattened-vs-hierarchical-idle-states.png
Binary files differ
diff --git a/docs/resources/diagrams/psci-osi-mode.png b/docs/resources/diagrams/psci-osi-mode.png
new file mode 100644
index 0000000..d322953
--- /dev/null
+++ b/docs/resources/diagrams/psci-osi-mode.png
Binary files differ
diff --git a/docs/resources/diagrams/psci-pc-mode-vs-osi-mode.png b/docs/resources/diagrams/psci-pc-mode-vs-osi-mode.png
new file mode 100644
index 0000000..7270a3d
--- /dev/null
+++ b/docs/resources/diagrams/psci-pc-mode-vs-osi-mode.png
Binary files differ
diff --git a/include/arch/aarch32/arch_features.h b/include/arch/aarch32/arch_features.h
index 252b407..7c25b99 100644
--- a/include/arch/aarch32/arch_features.h
+++ b/include/arch/aarch32/arch_features.h
@@ -25,6 +25,37 @@
 	return ISOLATE_FIELD(read_id_mmfr4(), ID_MMFR4_CNP) != 0U;
 }
 
+static unsigned int read_feat_amu_id_field(void)
+{
+	return ISOLATE_FIELD(read_id_pfr0(), ID_PFR0_AMU);
+}
+
+static inline bool is_feat_amu_supported(void)
+{
+	if (ENABLE_FEAT_AMU == FEAT_STATE_DISABLED) {
+		return false;
+	}
+
+	if (ENABLE_FEAT_AMU == FEAT_STATE_ALWAYS) {
+		return true;
+	}
+
+	return read_feat_amu_id_field() >= ID_PFR0_AMU_V1;
+}
+
+static inline bool is_feat_amuv1p1_supported(void)
+{
+	if (ENABLE_FEAT_AMUv1p1 == FEAT_STATE_DISABLED) {
+		return false;
+	}
+
+	if (ENABLE_FEAT_AMUv1p1 == FEAT_STATE_ALWAYS) {
+		return true;
+	}
+
+	return read_feat_amu_id_field() >= ID_PFR0_AMU_V1P1;
+}
+
 static inline unsigned int read_feat_trf_id_field(void)
 {
 	return ISOLATE_FIELD(read_id_dfr0(), ID_DFR0_TRACEFILT);
diff --git a/include/arch/aarch64/arch_features.h b/include/arch/aarch64/arch_features.h
index d3c6263..d7116a7 100644
--- a/include/arch/aarch64/arch_features.h
+++ b/include/arch/aarch64/arch_features.h
@@ -244,19 +244,27 @@
 
 static inline bool is_feat_amu_supported(void)
 {
-	if (ENABLE_FEAT_AMUv1 == FEAT_STATE_DISABLED) {
+	if (ENABLE_FEAT_AMU == FEAT_STATE_DISABLED) {
 		return false;
 	}
 
-	if (ENABLE_FEAT_AMUv1 == FEAT_STATE_ALWAYS) {
+	if (ENABLE_FEAT_AMU == FEAT_STATE_ALWAYS) {
 		return true;
 	}
 
 	return read_feat_amu_id_field() >= ID_AA64PFR0_AMU_V1;
 }
 
-static inline bool is_armv8_6_feat_amuv1p1_present(void)
+static inline bool is_feat_amuv1p1_supported(void)
 {
+	if (ENABLE_FEAT_AMUv1p1 == FEAT_STATE_DISABLED) {
+		return false;
+	}
+
+	if (ENABLE_FEAT_AMUv1p1 == FEAT_STATE_ALWAYS) {
+		return true;
+	}
+
 	return read_feat_amu_id_field() >= ID_AA64PFR0_AMU_V1P1;
 }
 
diff --git a/include/lib/extensions/amu.h b/include/lib/extensions/amu.h
index 6452f7e..de476e4 100644
--- a/include/lib/extensions/amu.h
+++ b/include/lib/extensions/amu.h
@@ -14,11 +14,23 @@
 
 #include <platform_def.h>
 
+#if ENABLE_FEAT_AMU
 #if __aarch64__
 void amu_enable(bool el2_unused, cpu_context_t *ctx);
 #else
 void amu_enable(bool el2_unused);
 #endif
+#else
+#if __aarch64__
+static inline void amu_enable(bool el2_unused, cpu_context_t *ctx)
+{
+}
+#else
+static inline void amu_enable(bool el2_unused)
+{
+}
+#endif
+#endif
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
 /*
diff --git a/include/lib/psci/psci.h b/include/lib/psci/psci.h
index b56e98b..6d27b7b 100644
--- a/include/lib/psci/psci.h
+++ b/include/lib/psci/psci.h
@@ -59,6 +59,7 @@
 #define PSCI_NODE_HW_STATE_AARCH64	U(0xc400000d)
 #define PSCI_SYSTEM_SUSPEND_AARCH32	U(0x8400000E)
 #define PSCI_SYSTEM_SUSPEND_AARCH64	U(0xc400000E)
+#define PSCI_SET_SUSPEND_MODE		U(0x8400000F)
 #define PSCI_STAT_RESIDENCY_AARCH32	U(0x84000010)
 #define PSCI_STAT_RESIDENCY_AARCH64	U(0xc4000010)
 #define PSCI_STAT_COUNT_AARCH32		U(0x84000011)
@@ -73,9 +74,17 @@
  * Number of PSCI calls (above) implemented
  */
 #if ENABLE_PSCI_STAT
-#define PSCI_NUM_CALLS			U(22)
+#if PSCI_OS_INIT_MODE
+#define PSCI_NUM_CALLS			U(30)
 #else
-#define PSCI_NUM_CALLS			U(18)
+#define PSCI_NUM_CALLS			U(29)
+#endif
+#else
+#if PSCI_OS_INIT_MODE
+#define PSCI_NUM_CALLS			U(26)
+#else
+#define PSCI_NUM_CALLS			U(25)
+#endif
 #endif
 
 /* The macros below are used to identify PSCI calls from the SMC function ID */
@@ -134,7 +143,11 @@
 
 /* Features flags for CPU SUSPEND OS Initiated mode support. Bits [0:0] */
 #define FF_MODE_SUPPORT_SHIFT		U(0)
+#if PSCI_OS_INIT_MODE
 #define FF_SUPPORTS_OS_INIT_MODE	U(1)
+#else
+#define FF_SUPPORTS_OS_INIT_MODE	U(0)
+#endif
 
 /*******************************************************************************
  * PSCI version
@@ -268,6 +281,13 @@
 	 * for the CPU.
 	 */
 	plat_local_state_t pwr_domain_state[PLAT_MAX_PWR_LVL + U(1)];
+#if PSCI_OS_INIT_MODE
+	/*
+	 * The highest power level at which the current CPU is the last running
+	 * CPU.
+	 */
+	unsigned int last_at_pwrlvl;
+#endif
 } psci_power_state_t;
 
 /*******************************************************************************
@@ -299,7 +319,11 @@
 	void (*pwr_domain_off)(const psci_power_state_t *target_state);
 	void (*pwr_domain_suspend_pwrdown_early)(
 				const psci_power_state_t *target_state);
+#if PSCI_OS_INIT_MODE
+	int (*pwr_domain_suspend)(const psci_power_state_t *target_state);
+#else
 	void (*pwr_domain_suspend)(const psci_power_state_t *target_state);
+#endif
 	void (*pwr_domain_on_finish)(const psci_power_state_t *target_state);
 	void (*pwr_domain_on_finish_late)(
 				const psci_power_state_t *target_state);
@@ -347,6 +371,9 @@
 int psci_node_hw_state(u_register_t target_cpu,
 		       unsigned int power_level);
 int psci_features(unsigned int psci_fid);
+#if PSCI_OS_INIT_MODE
+int psci_set_suspend_mode(unsigned int mode);
+#endif
 void __dead2 psci_power_down_wfi(void);
 void psci_arch_setup(void);
 
diff --git a/include/lib/psci/psci_lib.h b/include/lib/psci/psci_lib.h
index 3edc50b..4b244ec 100644
--- a/include/lib/psci/psci_lib.h
+++ b/include/lib/psci/psci_lib.h
@@ -92,6 +92,7 @@
 int psci_stop_other_cores(unsigned int wait_ms,
 			  void (*stop_func)(u_register_t mpidr));
 bool psci_is_last_on_cpu_safe(void);
+bool psci_are_all_cpus_on_safe(void);
 void psci_pwrdown_cpu(unsigned int power_level);
 
 #endif /* __ASSEMBLER__ */
diff --git a/include/plat/arm/common/plat_arm.h b/include/plat/arm/common/plat_arm.h
index 494e470..34f913b 100644
--- a/include/plat/arm/common/plat_arm.h
+++ b/include/plat/arm/common/plat_arm.h
@@ -125,6 +125,12 @@
 #define ARM_LOCAL_PSTATE_WIDTH		4
 #define ARM_LOCAL_PSTATE_MASK		((1 << ARM_LOCAL_PSTATE_WIDTH) - 1)
 
+#if PSCI_OS_INIT_MODE
+#define ARM_LAST_AT_PLVL_MASK		(ARM_LOCAL_PSTATE_MASK <<	\
+					 (ARM_LOCAL_PSTATE_WIDTH *	\
+					  (PLAT_MAX_PWR_LVL + 1)))
+#endif /* __PSCI_OS_INIT_MODE__ */
+
 /* Macros to construct the composite power state */
 
 /* Make composite power state parameter till power level 0 */
diff --git a/lib/cpus/aarch64/cortex_a75.S b/lib/cpus/aarch64/cortex_a75.S
index d561be4..e22c828 100644
--- a/lib/cpus/aarch64/cortex_a75.S
+++ b/lib/cpus/aarch64/cortex_a75.S
@@ -121,7 +121,7 @@
 	bl	errata_dsu_936184_wa
 #endif
 
-#if ENABLE_AMU
+#if ENABLE_FEAT_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
 	orr	x0, x0, #CORTEX_A75_ACTLR_AMEN_BIT
diff --git a/lib/cpus/aarch64/cortex_a78.S b/lib/cpus/aarch64/cortex_a78.S
index 421509d..69d7ab0 100644
--- a/lib/cpus/aarch64/cortex_a78.S
+++ b/lib/cpus/aarch64/cortex_a78.S
@@ -483,7 +483,7 @@
 	bl	errata_a78_2779479_wa
 #endif
 
-#if ENABLE_AMU
+#if ENABLE_FEAT_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
 	bic	x0, x0, #CORTEX_A78_ACTLR_TAM_BIT
diff --git a/lib/cpus/aarch64/cortex_a78_ae.S b/lib/cpus/aarch64/cortex_a78_ae.S
index 27adc38..d56f835 100644
--- a/lib/cpus/aarch64/cortex_a78_ae.S
+++ b/lib/cpus/aarch64/cortex_a78_ae.S
@@ -214,7 +214,7 @@
 	bl	errata_a78_ae_2395408_wa
 #endif
 
-#if ENABLE_AMU
+#if ENABLE_FEAT_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
 	bic	x0, x0, #CORTEX_A78_ACTLR_TAM_BIT
diff --git a/lib/cpus/aarch64/neoverse_n1.S b/lib/cpus/aarch64/neoverse_n1.S
index ec62519..827c0b0 100644
--- a/lib/cpus/aarch64/neoverse_n1.S
+++ b/lib/cpus/aarch64/neoverse_n1.S
@@ -585,7 +585,7 @@
 	bl	errata_n1_1946160_wa
 #endif
 
-#if ENABLE_AMU
+#if ENABLE_FEAT_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
 	orr	x0, x0, #NEOVERSE_N1_ACTLR_AMEN_BIT
diff --git a/lib/cpus/aarch64/neoverse_n2.S b/lib/cpus/aarch64/neoverse_n2.S
index dbf5941..60d322f 100644
--- a/lib/cpus/aarch64/neoverse_n2.S
+++ b/lib/cpus/aarch64/neoverse_n2.S
@@ -545,7 +545,7 @@
 	bl	errata_n2_2388450_wa
 #endif
 
-#if ENABLE_AMU
+#if ENABLE_FEAT_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, cptr_el3
 	orr	x0, x0, #TAM_BIT
diff --git a/lib/cpus/aarch64/rainier.S b/lib/cpus/aarch64/rainier.S
index 584ab97..3b7b8b2 100644
--- a/lib/cpus/aarch64/rainier.S
+++ b/lib/cpus/aarch64/rainier.S
@@ -94,7 +94,7 @@
 	bl	errata_n1_1868343_wa
 #endif
 
-#if ENABLE_AMU
+#if ENABLE_FEAT_AMU
 	/* Make sure accesses from EL0/EL1 and EL2 are not trapped to EL3 */
 	mrs	x0, actlr_el3
 	orr	x0, x0, #RAINIER_ACTLR_AMEN_BIT
diff --git a/lib/el3_runtime/aarch32/context_mgmt.c b/lib/el3_runtime/aarch32/context_mgmt.c
index e7a0e58..62e30fc 100644
--- a/lib/el3_runtime/aarch32/context_mgmt.c
+++ b/lib/el3_runtime/aarch32/context_mgmt.c
@@ -136,9 +136,9 @@
 static void enable_extensions_nonsecure(bool el2_unused)
 {
 #if IMAGE_BL32
-#if ENABLE_AMU
-	amu_enable(el2_unused);
-#endif
+	if (is_feat_amu_supported()) {
+		amu_enable(el2_unused);
+	}
 
 	if (is_feat_sys_reg_trace_supported()) {
 		sys_reg_trace_enable();
diff --git a/lib/el3_runtime/aarch64/context_mgmt.c b/lib/el3_runtime/aarch64/context_mgmt.c
index 04b685f..12f3e6d 100644
--- a/lib/el3_runtime/aarch64/context_mgmt.c
+++ b/lib/el3_runtime/aarch64/context_mgmt.c
@@ -485,9 +485,9 @@
 		spe_enable(el2_unused);
 	}
 
-#if ENABLE_AMU
-	amu_enable(el2_unused, ctx);
-#endif
+	if (is_feat_amu_supported()) {
+		amu_enable(el2_unused, ctx);
+	}
 
 #if ENABLE_SME_FOR_NS
 	/* Enable SME, SVE, and FPU/SIMD for non-secure world. */
diff --git a/lib/extensions/amu/aarch32/amu.c b/lib/extensions/amu/aarch32/amu.c
index 57b1158..03186d6 100644
--- a/lib/extensions/amu/aarch32/amu.c
+++ b/lib/extensions/amu/aarch32/amu.c
@@ -10,6 +10,7 @@
 
 #include "../amu_private.h"
 #include <arch.h>
+#include <arch_features.h>
 #include <arch_helpers.h>
 #include <common/debug.h>
 #include <lib/el3_runtime/pubsub_events.h>
@@ -39,12 +40,6 @@
 	amu_ctx_group1_enable_cannot_represent_all_group1_counters);
 #endif
 
-static inline __unused uint32_t read_id_pfr0_amu(void)
-{
-	return (read_id_pfr0() >> ID_PFR0_AMU_SHIFT) &
-		ID_PFR0_AMU_MASK;
-}
-
 static inline __unused void write_hcptr_tam(uint32_t value)
 {
 	write_hcptr((read_hcptr() & ~TAM_BIT) |
@@ -129,11 +124,6 @@
 	write_amcntenclr1(value);
 }
 
-static __unused bool amu_supported(void)
-{
-	return read_id_pfr0_amu() >= ID_PFR0_AMU_V1;
-}
-
 #if ENABLE_AMU_AUXILIARY_COUNTERS
 static __unused bool amu_group1_supported(void)
 {
@@ -147,23 +137,12 @@
  */
 void amu_enable(bool el2_unused)
 {
-	uint32_t id_pfr0_amu;		/* AMU version */
-
 	uint32_t amcfgr_ncg;		/* Number of counter groups */
 	uint32_t amcgcr_cg0nc;		/* Number of group 0 counters */
 
 	uint32_t amcntenset0_px = 0x0;	/* Group 0 enable mask */
 	uint32_t amcntenset1_px = 0x0;	/* Group 1 enable mask */
 
-	id_pfr0_amu = read_id_pfr0_amu();
-	if (id_pfr0_amu == ID_PFR0_AMU_NOT_SUPPORTED) {
-		/*
-		 * If the AMU is unsupported, nothing needs to be done.
-		 */
-
-		return;
-	}
-
 	if (el2_unused) {
 		/*
 		 * HCPTR.TAM: Set to zero so any accesses to the Activity
@@ -221,8 +200,8 @@
 #endif
 	}
 
-	/* Initialize FEAT_AMUv1p1 features if present. */
-	if (id_pfr0_amu < ID_PFR0_AMU_V1P1) {
+	/* Bail out if FEAT_AMUv1p1 features are not present. */
+	if (!is_feat_amuv1p1_supported()) {
 		return;
 	}
 
@@ -244,7 +223,7 @@
 /* Read the group 0 counter identified by the given `idx`. */
 static uint64_t amu_group0_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(idx < read_amcgcr_cg0nc());
 
 	return amu_group0_cnt_read_internal(idx);
@@ -253,7 +232,7 @@
 /* Write the group 0 counter identified by the given `idx` with `val` */
 static void amu_group0_cnt_write(unsigned  int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(idx < read_amcgcr_cg0nc());
 
 	amu_group0_cnt_write_internal(idx, val);
@@ -264,7 +243,7 @@
 /* Read the group 1 counter identified by the given `idx` */
 static uint64_t amu_group1_cnt_read(unsigned  int idx)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(amu_group1_supported());
 	assert(idx < read_amcgcr_cg1nc());
 
@@ -274,7 +253,7 @@
 /* Write the group 1 counter identified by the given `idx` with `val` */
 static void amu_group1_cnt_write(unsigned  int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(amu_group1_supported());
 	assert(idx < read_amcgcr_cg1nc());
 
@@ -290,7 +269,6 @@
 	unsigned int core_pos;
 	struct amu_ctx *ctx;
 
-	uint32_t id_pfr0_amu;	/* AMU version */
 	uint32_t amcgcr_cg0nc;	/* Number of group 0 counters */
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
@@ -298,8 +276,7 @@
 	uint32_t amcgcr_cg1nc;	/* Number of group 1 counters */
 #endif
 
-	id_pfr0_amu = read_id_pfr0_amu();
-	if (id_pfr0_amu == ID_PFR0_AMU_NOT_SUPPORTED) {
+	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
@@ -353,8 +330,6 @@
 	unsigned int core_pos;
 	struct amu_ctx *ctx;
 
-	uint32_t id_pfr0_amu;	/* AMU version */
-
 	uint32_t amcfgr_ncg;	/* Number of counter groups */
 	uint32_t amcgcr_cg0nc;	/* Number of group 0 counters */
 
@@ -362,8 +337,7 @@
 	uint32_t amcgcr_cg1nc;	/* Number of group 1 counters */
 #endif
 
-	id_pfr0_amu = read_id_pfr0_amu();
-	if (id_pfr0_amu == ID_PFR0_AMU_NOT_SUPPORTED) {
+	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
diff --git a/lib/extensions/amu/aarch64/amu.c b/lib/extensions/amu/aarch64/amu.c
index 72566fd..c650629 100644
--- a/lib/extensions/amu/aarch64/amu.c
+++ b/lib/extensions/amu/aarch64/amu.c
@@ -57,12 +57,6 @@
 	amu_ctx_group1_enable_cannot_represent_all_group1_counters);
 #endif
 
-static inline __unused uint64_t read_id_aa64pfr0_el1_amu(void)
-{
-	return (read_id_aa64pfr0_el1() >> ID_AA64PFR0_AMU_SHIFT) &
-		ID_AA64PFR0_AMU_MASK;
-}
-
 static inline __unused uint64_t read_hcr_el2_amvoffen(void)
 {
 	return (read_hcr_el2() & HCR_AMVOFFEN_BIT) >>
@@ -183,16 +177,6 @@
 	write_amcntenclr1_el0(value);
 }
 
-static __unused bool amu_supported(void)
-{
-	return read_id_aa64pfr0_el1_amu() >= ID_AA64PFR0_AMU_V1;
-}
-
-static __unused bool amu_v1p1_supported(void)
-{
-	return read_id_aa64pfr0_el1_amu() >= ID_AA64PFR0_AMU_V1P1;
-}
-
 #if ENABLE_AMU_AUXILIARY_COUNTERS
 static __unused bool amu_group1_supported(void)
 {
@@ -206,23 +190,12 @@
  */
 void amu_enable(bool el2_unused, cpu_context_t *ctx)
 {
-	uint64_t id_aa64pfr0_el1_amu;		/* AMU version */
-
 	uint64_t amcfgr_el0_ncg;		/* Number of counter groups */
 	uint64_t amcgcr_el0_cg0nc;		/* Number of group 0 counters */
 
 	uint64_t amcntenset0_el0_px = 0x0;	/* Group 0 enable mask */
 	uint64_t amcntenset1_el0_px = 0x0;	/* Group 1 enable mask */
 
-	id_aa64pfr0_el1_amu = read_id_aa64pfr0_el1_amu();
-	if (id_aa64pfr0_el1_amu == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
-		/*
-		 * If the AMU is unsupported, nothing needs to be done.
-		 */
-
-		return;
-	}
-
 	if (el2_unused) {
 		/*
 		 * CPTR_EL2.TAM: Set to zero so any accesses to the Activity
@@ -288,7 +261,7 @@
 	}
 
 	/* Initialize FEAT_AMUv1p1 features if present. */
-	if (id_aa64pfr0_el1_amu >= ID_AA64PFR0_AMU_V1P1) {
+	if (is_feat_amuv1p1_supported()) {
 		if (el2_unused) {
 			/*
 			 * Make sure virtual offsets are disabled if EL2 not
@@ -327,7 +300,7 @@
 /* Read the group 0 counter identified by the given `idx`. */
 static uint64_t amu_group0_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(idx < read_amcgcr_el0_cg0nc());
 
 	return amu_group0_cnt_read_internal(idx);
@@ -336,7 +309,7 @@
 /* Write the group 0 counter identified by the given `idx` with `val` */
 static void amu_group0_cnt_write(unsigned  int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(idx < read_amcgcr_el0_cg0nc());
 
 	amu_group0_cnt_write_internal(idx, val);
@@ -376,7 +349,7 @@
  */
 static uint64_t amu_group0_voffset_read(unsigned int idx)
 {
-	assert(amu_v1p1_supported());
+	assert(is_feat_amuv1p1_supported());
 	assert(idx < read_amcgcr_el0_cg0nc());
 	assert(idx != 1U);
 
@@ -391,7 +364,7 @@
  */
 static void amu_group0_voffset_write(unsigned int idx, uint64_t val)
 {
-	assert(amu_v1p1_supported());
+	assert(is_feat_amuv1p1_supported());
 	assert(idx < read_amcgcr_el0_cg0nc());
 	assert(idx != 1U);
 
@@ -403,7 +376,7 @@
 /* Read the group 1 counter identified by the given `idx` */
 static uint64_t amu_group1_cnt_read(unsigned int idx)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(amu_group1_supported());
 	assert(idx < read_amcgcr_el0_cg1nc());
 
@@ -413,7 +386,7 @@
 /* Write the group 1 counter identified by the given `idx` with `val` */
 static void amu_group1_cnt_write(unsigned int idx, uint64_t val)
 {
-	assert(amu_supported());
+	assert(is_feat_amu_supported());
 	assert(amu_group1_supported());
 	assert(idx < read_amcgcr_el0_cg1nc());
 
@@ -428,7 +401,7 @@
  */
 static uint64_t amu_group1_voffset_read(unsigned int idx)
 {
-	assert(amu_v1p1_supported());
+	assert(is_feat_amuv1p1_supported());
 	assert(amu_group1_supported());
 	assert(idx < read_amcgcr_el0_cg1nc());
 	assert((read_amcg1idr_el0_voff() & (UINT64_C(1) << idx)) != 0U);
@@ -443,7 +416,7 @@
  */
 static void amu_group1_voffset_write(unsigned int idx, uint64_t val)
 {
-	assert(amu_v1p1_supported());
+	assert(is_feat_amuv1p1_supported());
 	assert(amu_group1_supported());
 	assert(idx < read_amcgcr_el0_cg1nc());
 	assert((read_amcg1idr_el0_voff() & (UINT64_C(1) << idx)) != 0U);
@@ -460,8 +433,7 @@
 	unsigned int core_pos;
 	struct amu_ctx *ctx;
 
-	uint64_t id_aa64pfr0_el1_amu;	/* AMU version */
-	uint64_t hcr_el2_amvoffen;	/* AMU virtual offsets enabled */
+	uint64_t hcr_el2_amvoffen = 0;	/* AMU virtual offsets enabled */
 	uint64_t amcgcr_el0_cg0nc;	/* Number of group 0 counters */
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
@@ -470,8 +442,7 @@
 	uint64_t amcgcr_el0_cg1nc;	/* Number of group 1 counters */
 #endif
 
-	id_aa64pfr0_el1_amu = read_id_aa64pfr0_el1_amu();
-	if (id_aa64pfr0_el1_amu == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
+	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
@@ -479,8 +450,9 @@
 	ctx = &amu_ctxs_[core_pos];
 
 	amcgcr_el0_cg0nc = read_amcgcr_el0_cg0nc();
-	hcr_el2_amvoffen = (id_aa64pfr0_el1_amu >= ID_AA64PFR0_AMU_V1P1) ?
-		read_hcr_el2_amvoffen() : 0U;
+	if (is_feat_amuv1p1_supported()) {
+		hcr_el2_amvoffen = read_hcr_el2_amvoffen();
+	}
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
 	amcfgr_el0_ncg = read_amcfgr_el0_ncg();
@@ -552,9 +524,7 @@
 	unsigned int core_pos;
 	struct amu_ctx *ctx;
 
-	uint64_t id_aa64pfr0_el1_amu;	/* AMU version */
-
-	uint64_t hcr_el2_amvoffen;	/* AMU virtual offsets enabled */
+	uint64_t hcr_el2_amvoffen = 0;	/* AMU virtual offsets enabled */
 
 	uint64_t amcfgr_el0_ncg;	/* Number of counter groups */
 	uint64_t amcgcr_el0_cg0nc;	/* Number of group 0 counters */
@@ -564,8 +534,7 @@
 	uint64_t amcg1idr_el0_voff;	/* Auxiliary counters with virtual offsets */
 #endif
 
-	id_aa64pfr0_el1_amu = read_id_aa64pfr0_el1_amu();
-	if (id_aa64pfr0_el1_amu == ID_AA64PFR0_AMU_NOT_SUPPORTED) {
+	if (!is_feat_amu_supported()) {
 		return (void *)0;
 	}
 
@@ -575,8 +544,9 @@
 	amcfgr_el0_ncg = read_amcfgr_el0_ncg();
 	amcgcr_el0_cg0nc = read_amcgcr_el0_cg0nc();
 
-	hcr_el2_amvoffen = (id_aa64pfr0_el1_amu >= ID_AA64PFR0_AMU_V1P1) ?
-		read_hcr_el2_amvoffen() : 0U;
+	if (is_feat_amuv1p1_supported()) {
+		hcr_el2_amvoffen = read_hcr_el2_amvoffen();
+	}
 
 #if ENABLE_AMU_AUXILIARY_COUNTERS
 	amcgcr_el0_cg1nc = (amcfgr_el0_ncg > 0U) ? read_amcgcr_el0_cg1nc() : 0U;
diff --git a/lib/extensions/amu/amu.mk b/lib/extensions/amu/amu.mk
index 0d203cb..868ab12 100644
--- a/lib/extensions/amu/amu.mk
+++ b/lib/extensions/amu/amu.mk
@@ -10,8 +10,8 @@
 			lib/extensions/amu/${ARCH}/amu_helpers.S
 
 ifneq (${ENABLE_AMU_AUXILIARY_COUNTERS},0)
-        ifeq (${ENABLE_AMU},0)
-                $(error AMU auxiliary counter support (`ENABLE_AMU_AUXILIARY_COUNTERS`) requires AMU support (`ENABLE_AMU`))
+        ifeq (${ENABLE_FEAT_AMU},0)
+                $(error AMU auxiliary counter support (`ENABLE_AMU_AUXILIARY_COUNTERS`) requires AMU support (`ENABLE_FEAT_AMU`))
         endif
 endif
 
diff --git a/lib/psci/psci_common.c b/lib/psci/psci_common.c
index f233be1..ebeb10b 100644
--- a/lib/psci/psci_common.c
+++ b/lib/psci/psci_common.c
@@ -76,6 +76,14 @@
 	(PLAT_MAX_PWR_LVL >= PSCI_CPU_PWR_LVL),
 	assert_platform_max_pwrlvl_check);
 
+#if PSCI_OS_INIT_MODE
+/*******************************************************************************
+ * The power state coordination mode used in CPU_SUSPEND.
+ * Defaults to platform-coordinated mode.
+ ******************************************************************************/
+suspend_mode_t psci_suspend_mode = PLAT_COORD;
+#endif
+
 /*
  * The plat_local_state used by the platform is one of these types: RUN,
  * RETENTION and OFF. The platform can define further sub-states for each type
@@ -153,8 +161,51 @@
 	psci_plat_pm_ops->get_sys_suspend_power_state(state_info);
 }
 
+#if PSCI_OS_INIT_MODE
 /*******************************************************************************
- * This function verifies that the all the other cores in the system have been
+ * This function verifies that all the other cores at the 'end_pwrlvl' have been
+ * idled and the current CPU is the last running CPU at the 'end_pwrlvl'.
+ * Returns 1 (true) if the current CPU is the last ON CPU or 0 (false)
+ * otherwise.
+ ******************************************************************************/
+static bool psci_is_last_cpu_to_idle_at_pwrlvl(unsigned int end_pwrlvl)
+{
+	unsigned int my_idx, lvl, parent_idx;
+	unsigned int cpu_start_idx, ncpus, cpu_idx;
+	plat_local_state_t local_state;
+
+	if (end_pwrlvl == PSCI_CPU_PWR_LVL) {
+		return true;
+	}
+
+	my_idx = plat_my_core_pos();
+
+	for (lvl = PSCI_CPU_PWR_LVL; lvl <= end_pwrlvl; lvl++) {
+		parent_idx = psci_cpu_pd_nodes[my_idx].parent_node;
+	}
+
+	cpu_start_idx = psci_non_cpu_pd_nodes[parent_idx].cpu_start_idx;
+	ncpus = psci_non_cpu_pd_nodes[parent_idx].ncpus;
+
+	for (cpu_idx = cpu_start_idx; cpu_idx < cpu_start_idx + ncpus;
+			cpu_idx++) {
+		local_state = psci_get_cpu_local_state_by_idx(cpu_idx);
+		if (cpu_idx == my_idx) {
+			assert(is_local_state_run(local_state) != 0);
+			continue;
+		}
+
+		if (is_local_state_run(local_state) != 0) {
+			return false;
+		}
+	}
+
+	return true;
+}
+#endif
+
+/*******************************************************************************
+ * This function verifies that all the other cores in the system have been
  * turned OFF and the current CPU is the last running CPU in the system.
  * Returns true, if the current CPU is the last ON CPU or false otherwise.
  ******************************************************************************/
@@ -179,6 +230,23 @@
 }
 
 /*******************************************************************************
+ * This function verifies that all cores in the system have been turned ON.
+ * Returns true, if all CPUs are ON or false otherwise.
+ ******************************************************************************/
+static bool psci_are_all_cpus_on(void)
+{
+	unsigned int cpu_idx;
+
+	for (cpu_idx = 0; cpu_idx < psci_plat_core_count; cpu_idx++) {
+		if (psci_get_aff_info_state_by_idx(cpu_idx) == AFF_STATE_OFF) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/*******************************************************************************
  * Routine to return the maximum power level to traverse to after a cpu has
  * been physically powered up. It is expected to be called immediately after
  * reset from assembler code.
@@ -252,6 +320,60 @@
 	} else
 		return NULL;
 }
+
+#if PSCI_OS_INIT_MODE
+/******************************************************************************
+ * Helper function to save a copy of the psci_req_local_pwr_states (prev) for a
+ * CPU (cpu_idx), and update psci_req_local_pwr_states with the new requested
+ * local power states (state_info).
+ *****************************************************************************/
+void psci_update_req_local_pwr_states(unsigned int end_pwrlvl,
+				      unsigned int cpu_idx,
+				      psci_power_state_t *state_info,
+				      plat_local_state_t *prev)
+{
+	unsigned int lvl;
+#ifdef PLAT_MAX_CPU_SUSPEND_PWR_LVL
+	unsigned int max_pwrlvl = PLAT_MAX_CPU_SUSPEND_PWR_LVL;
+#else
+	unsigned int max_pwrlvl = PLAT_MAX_PWR_LVL;
+#endif
+	plat_local_state_t req_state;
+
+	for (lvl = PSCI_CPU_PWR_LVL + 1U; lvl <= max_pwrlvl; lvl++) {
+		/* Save the previous requested local power state */
+		prev[lvl - 1U] = *psci_get_req_local_pwr_states(lvl, cpu_idx);
+
+		/* Update the new requested local power state */
+		if (lvl <= end_pwrlvl) {
+			req_state = state_info->pwr_domain_state[lvl];
+		} else {
+			req_state = state_info->pwr_domain_state[end_pwrlvl];
+		}
+		psci_set_req_local_pwr_state(lvl, cpu_idx, req_state);
+	}
+}
+
+/******************************************************************************
+ * Helper function to restore the previously saved requested local power states
+ * (prev) for a CPU (cpu_idx) to psci_req_local_pwr_states.
+ *****************************************************************************/
+void psci_restore_req_local_pwr_states(unsigned int cpu_idx,
+				       plat_local_state_t *prev)
+{
+	unsigned int lvl;
+#ifdef PLAT_MAX_CPU_SUSPEND_PWR_LVL
+	unsigned int max_pwrlvl = PLAT_MAX_CPU_SUSPEND_PWR_LVL;
+#else
+	unsigned int max_pwrlvl = PLAT_MAX_PWR_LVL;
+#endif
+
+	for (lvl = PSCI_CPU_PWR_LVL + 1U; lvl <= max_pwrlvl; lvl++) {
+		/* Restore the previous requested local power state */
+		psci_set_req_local_pwr_state(lvl, cpu_idx, prev[lvl - 1U]);
+	}
+}
+#endif
 
 /*
  * psci_non_cpu_pd_nodes can be placed either in normal memory or coherent
@@ -399,6 +521,8 @@
 }
 
 /******************************************************************************
+ * This function is used in platform-coordinated mode.
+ *
  * This function is passed the local power states requested for each power
  * domain (state_info) between the current CPU domain and its ancestors until
  * the target power level (end_pwrlvl). It updates the array of requested power
@@ -476,6 +600,97 @@
 	psci_set_target_local_pwr_states(end_pwrlvl, state_info);
 }
 
+#if PSCI_OS_INIT_MODE
+/******************************************************************************
+ * This function is used in OS-initiated mode.
+ *
+ * This function is passed the local power states requested for each power
+ * domain (state_info) between the current CPU domain and its ancestors until
+ * the target power level (end_pwrlvl), and ensures the requested power states
+ * are valid. It updates the array of requested power states with this
+ * information.
+ *
+ * Then, for each level (apart from the CPU level) until the 'end_pwrlvl', it
+ * retrieves the states requested by all the cpus of which the power domain at
+ * that level is an ancestor. It passes this information to the platform to
+ * coordinate and return the target power state. If the requested state does
+ * not match the target state, the request is denied.
+ *
+ * The 'state_info' is not modified.
+ *
+ * This function will only be invoked with data cache enabled and while
+ * powering down a core.
+ *****************************************************************************/
+int psci_validate_state_coordination(unsigned int end_pwrlvl,
+				     psci_power_state_t *state_info)
+{
+	int rc = PSCI_E_SUCCESS;
+	unsigned int lvl, parent_idx, cpu_idx = plat_my_core_pos();
+	unsigned int start_idx;
+	unsigned int ncpus;
+	plat_local_state_t target_state, *req_states;
+	plat_local_state_t prev[PLAT_MAX_PWR_LVL];
+
+	assert(end_pwrlvl <= PLAT_MAX_PWR_LVL);
+	parent_idx = psci_cpu_pd_nodes[cpu_idx].parent_node;
+
+	/*
+	 * Save a copy of the previous requested local power states and update
+	 * the new requested local power states.
+	 */
+	psci_update_req_local_pwr_states(end_pwrlvl, cpu_idx, state_info, prev);
+
+	for (lvl = PSCI_CPU_PWR_LVL + 1U; lvl <= end_pwrlvl; lvl++) {
+		/* Get the requested power states for this power level */
+		start_idx = psci_non_cpu_pd_nodes[parent_idx].cpu_start_idx;
+		req_states = psci_get_req_local_pwr_states(lvl, start_idx);
+
+		/*
+		 * Let the platform coordinate amongst the requested states at
+		 * this power level and return the target local power state.
+		 */
+		ncpus = psci_non_cpu_pd_nodes[parent_idx].ncpus;
+		target_state = plat_get_target_pwr_state(lvl,
+							 req_states,
+							 ncpus);
+
+		/*
+		 * Verify that the requested power state matches the target
+		 * local power state.
+		 */
+		if (state_info->pwr_domain_state[lvl] != target_state) {
+			if (target_state == PSCI_LOCAL_STATE_RUN) {
+				rc = PSCI_E_DENIED;
+			} else {
+				rc = PSCI_E_INVALID_PARAMS;
+			}
+			goto exit;
+		}
+	}
+
+	/*
+	 * Verify that the current core is the last running core at the
+	 * specified power level.
+	 */
+	lvl = state_info->last_at_pwrlvl;
+	if (!psci_is_last_cpu_to_idle_at_pwrlvl(lvl)) {
+		rc = PSCI_E_DENIED;
+	}
+
+exit:
+	if (rc != PSCI_E_SUCCESS) {
+		/* Restore the previous requested local power states. */
+		psci_restore_req_local_pwr_states(cpu_idx, prev);
+		return rc;
+	}
+
+	/* Update the target state in the power domain nodes */
+	psci_set_target_local_pwr_states(end_pwrlvl, state_info);
+
+	return rc;
+}
+#endif
+
 /******************************************************************************
  * This function validates a suspend request by making sure that if a standby
  * state is requested then no power level is turned off and the highest power
@@ -1050,3 +1265,29 @@
 
 	return true;
 }
+
+/*******************************************************************************
+ * This function verifies that all cores in the system have been turned ON.
+ * Returns true, if all CPUs are ON or false otherwise.
+ *
+ * This API has following differences with psci_are_all_cpus_on
+ *  1. PSCI states are locked
+ ******************************************************************************/
+bool psci_are_all_cpus_on_safe(void)
+{
+	unsigned int this_core = plat_my_core_pos();
+	unsigned int parent_nodes[PLAT_MAX_PWR_LVL] = {0};
+
+	psci_get_parent_pwr_domain_nodes(this_core, PLAT_MAX_PWR_LVL, parent_nodes);
+
+	psci_acquire_pwr_domain_locks(PLAT_MAX_PWR_LVL, parent_nodes);
+
+	if (!psci_are_all_cpus_on()) {
+		psci_release_pwr_domain_locks(PLAT_MAX_PWR_LVL, parent_nodes);
+		return false;
+	}
+
+	psci_release_pwr_domain_locks(PLAT_MAX_PWR_LVL, parent_nodes);
+
+	return true;
+}
diff --git a/lib/psci/psci_main.c b/lib/psci/psci_main.c
index a631f3f..276c3a5 100644
--- a/lib/psci/psci_main.c
+++ b/lib/psci/psci_main.c
@@ -60,6 +60,10 @@
 	entry_point_info_t ep;
 	psci_power_state_t state_info = { {PSCI_LOCAL_STATE_RUN} };
 	plat_local_state_t cpu_pd_state;
+#if PSCI_OS_INIT_MODE
+	unsigned int cpu_idx = plat_my_core_pos();
+	plat_local_state_t prev[PLAT_MAX_PWR_LVL];
+#endif
 
 	/* Validate the power_state parameter */
 	rc = psci_validate_power_state(power_state, &state_info);
@@ -95,6 +99,18 @@
 		cpu_pd_state = state_info.pwr_domain_state[PSCI_CPU_PWR_LVL];
 		psci_set_cpu_local_state(cpu_pd_state);
 
+#if PSCI_OS_INIT_MODE
+		/*
+		 * If in OS-initiated mode, save a copy of the previous
+		 * requested local power states and update the new requested
+		 * local power states for this CPU.
+		 */
+		if (psci_suspend_mode == OS_INIT) {
+			psci_update_req_local_pwr_states(target_pwrlvl, cpu_idx,
+							 &state_info, prev);
+		}
+#endif
+
 #if ENABLE_PSCI_STAT
 		plat_psci_stat_accounting_start(&state_info);
 #endif
@@ -110,6 +126,16 @@
 		/* Upon exit from standby, set the state back to RUN. */
 		psci_set_cpu_local_state(PSCI_LOCAL_STATE_RUN);
 
+#if PSCI_OS_INIT_MODE
+		/*
+		 * If in OS-initiated mode, restore the previous requested
+		 * local power states for this CPU.
+		 */
+		if (psci_suspend_mode == OS_INIT) {
+			psci_restore_req_local_pwr_states(cpu_idx, prev);
+		}
+#endif
+
 #if ENABLE_RUNTIME_INSTRUMENTATION
 		PMF_CAPTURE_TIMESTAMP(rt_instr_svc,
 		    RT_INSTR_EXIT_HW_LOW_PWR,
@@ -142,12 +168,12 @@
 	 * might return if the power down was abandoned for any reason, e.g.
 	 * arrival of an interrupt
 	 */
-	psci_cpu_suspend_start(&ep,
-			    target_pwrlvl,
-			    &state_info,
-			    is_power_down_state);
+	rc = psci_cpu_suspend_start(&ep,
+				    target_pwrlvl,
+				    &state_info,
+				    is_power_down_state);
 
-	return PSCI_E_SUCCESS;
+	return rc;
 }
 
 
@@ -187,12 +213,12 @@
 	 * might return if the power down was abandoned for any reason, e.g.
 	 * arrival of an interrupt
 	 */
-	psci_cpu_suspend_start(&ep,
-			    PLAT_MAX_PWR_LVL,
-			    &state_info,
-			    PSTATE_TYPE_POWERDOWN);
+	rc = psci_cpu_suspend_start(&ep,
+				    PLAT_MAX_PWR_LVL,
+				    &state_info,
+				    PSTATE_TYPE_POWERDOWN);
 
-	return PSCI_E_SUCCESS;
+	return rc;
 }
 
 int psci_cpu_off(void)
@@ -357,19 +383,48 @@
 	/* Format the feature flags */
 	if ((psci_fid == PSCI_CPU_SUSPEND_AARCH32) ||
 	    (psci_fid == PSCI_CPU_SUSPEND_AARCH64)) {
-		/*
-		 * The trusted firmware does not support OS Initiated Mode.
-		 */
 		unsigned int ret = ((FF_PSTATE << FF_PSTATE_SHIFT) |
-			(((FF_SUPPORTS_OS_INIT_MODE == 1U) ? 0U : 1U)
-				<< FF_MODE_SUPPORT_SHIFT));
-		return (int) ret;
+			(FF_SUPPORTS_OS_INIT_MODE << FF_MODE_SUPPORT_SHIFT));
+		return (int)ret;
 	}
 
 	/* Return 0 for all other fid's */
 	return PSCI_E_SUCCESS;
 }
 
+#if PSCI_OS_INIT_MODE
+int psci_set_suspend_mode(unsigned int mode)
+{
+	if (psci_suspend_mode == mode) {
+		return PSCI_E_SUCCESS;
+	}
+
+	if (mode == PLAT_COORD) {
+		/* Check if the current CPU is the last ON CPU in the system */
+		if (!psci_is_last_on_cpu_safe()) {
+			return PSCI_E_DENIED;
+		}
+	}
+
+	if (mode == OS_INIT) {
+		/*
+		 * Check if all CPUs in the system are ON or if the current
+		 * CPU is the last ON CPU in the system.
+		 */
+		if (!(psci_are_all_cpus_on_safe() ||
+		      psci_is_last_on_cpu_safe())) {
+			return PSCI_E_DENIED;
+		}
+	}
+
+	psci_suspend_mode = mode;
+	psci_flush_dcache_range((uintptr_t)&psci_suspend_mode,
+				sizeof(psci_suspend_mode));
+
+	return PSCI_E_SUCCESS;
+}
+#endif
+
 /*******************************************************************************
  * PSCI top level handler for servicing SMCs.
  ******************************************************************************/
@@ -453,6 +508,12 @@
 			ret = (u_register_t)psci_features(r1);
 			break;
 
+#if PSCI_OS_INIT_MODE
+		case PSCI_SET_SUSPEND_MODE:
+			ret = (u_register_t)psci_set_suspend_mode(r1);
+			break;
+#endif
+
 #if ENABLE_PSCI_STAT
 		case PSCI_STAT_RESIDENCY_AARCH32:
 			ret = psci_stat_residency(r1, r2);
@@ -506,6 +567,10 @@
 			ret = psci_migrate_info_up_cpu();
 			break;
 
+		case PSCI_FEATURES:
+			ret = (u_register_t)psci_features(x1);
+			break;
+
 		case PSCI_NODE_HW_STATE_AARCH64:
 			ret = (u_register_t)psci_node_hw_state(
 					x1, (unsigned int) x2);
@@ -515,6 +580,12 @@
 			ret = (u_register_t)psci_system_suspend(x1, x2);
 			break;
 
+#if PSCI_OS_INIT_MODE
+		case PSCI_SET_SUSPEND_MODE:
+			ret = (u_register_t)psci_set_suspend_mode(x1);
+			break;
+#endif
+
 #if ENABLE_PSCI_STAT
 		case PSCI_STAT_RESIDENCY_AARCH64:
 			ret = psci_stat_residency(x1, (unsigned int) x2);
diff --git a/lib/psci/psci_private.h b/lib/psci/psci_private.h
index 6ca9ef6..b9987fe 100644
--- a/lib/psci/psci_private.h
+++ b/lib/psci/psci_private.h
@@ -163,6 +163,16 @@
 	spinlock_t cpu_lock;
 } cpu_pd_node_t;
 
+#if PSCI_OS_INIT_MODE
+/*******************************************************************************
+ * The supported power state coordination modes that can be used in CPU_SUSPEND.
+ ******************************************************************************/
+typedef enum suspend_mode {
+	PLAT_COORD = 0,
+	OS_INIT = 1
+} suspend_mode_t;
+#endif
+
 /*******************************************************************************
  * The following are helpers and declarations of locks.
  ******************************************************************************/
@@ -260,6 +270,9 @@
 extern cpu_pd_node_t psci_cpu_pd_nodes[PLATFORM_CORE_COUNT];
 extern unsigned int psci_caps;
 extern unsigned int psci_plat_core_count;
+#if PSCI_OS_INIT_MODE
+extern suspend_mode_t psci_suspend_mode;
+#endif
 
 /*******************************************************************************
  * SPD's power management hooks registered with PSCI
@@ -275,6 +288,14 @@
 void psci_query_sys_suspend_pwrstate(psci_power_state_t *state_info);
 int psci_validate_mpidr(u_register_t mpidr);
 void psci_init_req_local_pwr_states(void);
+#if PSCI_OS_INIT_MODE
+void psci_update_req_local_pwr_states(unsigned int end_pwrlvl,
+				      unsigned int cpu_idx,
+				      psci_power_state_t *state_info,
+				      plat_local_state_t *prev);
+void psci_restore_req_local_pwr_states(unsigned int cpu_idx,
+				       plat_local_state_t *prev);
+#endif
 void psci_get_target_local_pwr_states(unsigned int end_pwrlvl,
 				      psci_power_state_t *target_state);
 int psci_validate_entry_point(entry_point_info_t *ep,
@@ -284,6 +305,10 @@
 				      unsigned int *node_index);
 void psci_do_state_coordination(unsigned int end_pwrlvl,
 				psci_power_state_t *state_info);
+#if PSCI_OS_INIT_MODE
+int psci_validate_state_coordination(unsigned int end_pwrlvl,
+				     psci_power_state_t *state_info);
+#endif
 void psci_acquire_pwr_domain_locks(unsigned int end_pwrlvl,
 				   const unsigned int *parent_nodes);
 void psci_release_pwr_domain_locks(unsigned int end_pwrlvl,
@@ -317,10 +342,10 @@
 int psci_do_cpu_off(unsigned int end_pwrlvl);
 
 /* Private exported functions from psci_suspend.c */
-void psci_cpu_suspend_start(const entry_point_info_t *ep,
-			unsigned int end_pwrlvl,
-			psci_power_state_t *state_info,
-			unsigned int is_power_down_state);
+int psci_cpu_suspend_start(const entry_point_info_t *ep,
+			   unsigned int end_pwrlvl,
+			   psci_power_state_t *state_info,
+			   unsigned int is_power_down_state);
 
 void psci_cpu_suspend_finish(unsigned int cpu_idx, const psci_power_state_t *state_info);
 
diff --git a/lib/psci/psci_setup.c b/lib/psci/psci_setup.c
index 3cb4f7e..16d6e45 100644
--- a/lib/psci/psci_setup.c
+++ b/lib/psci/psci_setup.c
@@ -254,6 +254,9 @@
 			psci_caps |=  define_psci_cap(PSCI_CPU_SUSPEND_AARCH64);
 		if (psci_plat_pm_ops->get_sys_suspend_power_state != NULL)
 			psci_caps |=  define_psci_cap(PSCI_SYSTEM_SUSPEND_AARCH64);
+#if PSCI_OS_INIT_MODE
+		psci_caps |= define_psci_cap(PSCI_SET_SUSPEND_MODE);
+#endif
 	}
 	if (psci_plat_pm_ops->system_off != NULL)
 		psci_caps |=  define_psci_cap(PSCI_SYSTEM_OFF);
diff --git a/lib/psci/psci_suspend.c b/lib/psci/psci_suspend.c
index f71994d..861b875 100644
--- a/lib/psci/psci_suspend.c
+++ b/lib/psci/psci_suspend.c
@@ -75,6 +75,14 @@
 
 	PUBLISH_EVENT(psci_suspend_pwrdown_start);
 
+#if PSCI_OS_INIT_MODE
+#ifdef PLAT_MAX_CPU_SUSPEND_PWR_LVL
+	end_pwrlvl = PLAT_MAX_CPU_SUSPEND_PWR_LVL;
+#else
+	end_pwrlvl = PLAT_MAX_PWR_LVL;
+#endif
+#endif
+
 	/* Save PSCI target power level for the suspend finisher handler */
 	psci_set_suspend_pwrlvl(end_pwrlvl);
 
@@ -151,12 +159,13 @@
  * the state transition has been done, no further error is expected and it is
  * not possible to undo any of the actions taken beyond that point.
  ******************************************************************************/
-void psci_cpu_suspend_start(const entry_point_info_t *ep,
-			    unsigned int end_pwrlvl,
-			    psci_power_state_t *state_info,
-			    unsigned int is_power_down_state)
+int psci_cpu_suspend_start(const entry_point_info_t *ep,
+			   unsigned int end_pwrlvl,
+			   psci_power_state_t *state_info,
+			   unsigned int is_power_down_state)
 {
-	int skip_wfi = 0;
+	int rc = PSCI_E_SUCCESS;
+	bool skip_wfi = false;
 	unsigned int idx = plat_my_core_pos();
 	unsigned int parent_nodes[PLAT_MAX_PWR_LVL] = {0};
 
@@ -183,16 +192,32 @@
 	 * detection that a wake-up interrupt has fired.
 	 */
 	if (read_isr_el1() != 0U) {
-		skip_wfi = 1;
+		skip_wfi = true;
 		goto exit;
 	}
 
-	/*
-	 * This function is passed the requested state info and
-	 * it returns the negotiated state info for each power level upto
-	 * the end level specified.
-	 */
-	psci_do_state_coordination(end_pwrlvl, state_info);
+#if PSCI_OS_INIT_MODE
+	if (psci_suspend_mode == OS_INIT) {
+		/*
+		 * This function validates the requested state info for
+		 * OS-initiated mode.
+		 */
+		rc = psci_validate_state_coordination(end_pwrlvl, state_info);
+		if (rc != PSCI_E_SUCCESS) {
+			skip_wfi = true;
+			goto exit;
+		}
+	} else {
+#endif
+		/*
+		 * This function is passed the requested state info and
+		 * it returns the negotiated state info for each power level upto
+		 * the end level specified.
+		 */
+		psci_do_state_coordination(end_pwrlvl, state_info);
+#if PSCI_OS_INIT_MODE
+	}
+#endif
 
 #if ENABLE_PSCI_STAT
 	/* Update the last cpu for each level till end_pwrlvl */
@@ -208,7 +233,16 @@
 	 * platform defined mailbox with the psci entrypoint,
 	 * program the power controller etc.
 	 */
+
+#if PSCI_OS_INIT_MODE
+	rc = psci_plat_pm_ops->pwr_domain_suspend(state_info);
+	if (rc != PSCI_E_SUCCESS) {
+		skip_wfi = true;
+		goto exit;
+	}
+#else
 	psci_plat_pm_ops->pwr_domain_suspend(state_info);
+#endif
 
 #if ENABLE_PSCI_STAT
 	plat_psci_stat_accounting_start(state_info);
@@ -221,8 +255,9 @@
 	 */
 	psci_release_pwr_domain_locks(end_pwrlvl, parent_nodes);
 
-	if (skip_wfi == 1)
-		return;
+	if (skip_wfi) {
+		return rc;
+	}
 
 	if (is_power_down_state != 0U) {
 #if ENABLE_RUNTIME_INSTRUMENTATION
@@ -269,6 +304,8 @@
 	 * context retaining suspend finisher.
 	 */
 	psci_suspend_to_standby_finisher(idx, end_pwrlvl);
+
+	return rc;
 }
 
 /*******************************************************************************
diff --git a/make_helpers/defaults.mk b/make_helpers/defaults.mk
index b928fcf..80c6174 100644
--- a/make_helpers/defaults.mk
+++ b/make_helpers/defaults.mk
@@ -133,9 +133,6 @@
 # Use BRANCH_PROTECTION to enable PAUTH.
 ENABLE_PAUTH			:= 0
 
-# Flag to enable access to the HAFGRTR_EL2 register
-ENABLE_FEAT_AMUv1		:= 0
-
 # Flag to enable AMUv1p1 extension.
 ENABLE_FEAT_AMUv1p1		:= 0
 
@@ -258,6 +255,9 @@
 # Flag used to choose the power state format: Extended State-ID or Original
 PSCI_EXTENDED_STATE_ID		:= 0
 
+# Enable PSCI OS-initiated mode support
+PSCI_OS_INIT_MODE		:= 0
+
 # Enable RAS support
 RAS_EXTENSION			:= 0
 
@@ -367,7 +367,7 @@
 # enabled at ELX.
 CTX_INCLUDE_MTE_REGS		:= 0
 
-ENABLE_AMU			:= 0
+ENABLE_FEAT_AMU			:= 0
 ENABLE_AMU_AUXILIARY_COUNTERS	:= 0
 ENABLE_AMU_FCONF		:= 0
 AMU_RESTRICT_COUNTERS		:= 0
diff --git a/plat/arm/board/arm_fpga/platform.mk b/plat/arm/board/arm_fpga/platform.mk
index a14a0d8..109bfbe 100644
--- a/plat/arm/board/arm_fpga/platform.mk
+++ b/plat/arm/board/arm_fpga/platform.mk
@@ -33,7 +33,7 @@
 FPGA_PRELOADED_CMD_LINE := 0x1000
 $(eval $(call add_define,FPGA_PRELOADED_CMD_LINE))
 
-ENABLE_AMU		:=	1
+ENABLE_FEAT_AMU		:=	2
 
 # Treating this as a memory-constrained port for now
 USE_COHERENT_MEM	:=	0
diff --git a/plat/arm/board/fvp/fvp_pm.c b/plat/arm/board/fvp/fvp_pm.c
index 3d53388..a3289b6 100644
--- a/plat/arm/board/fvp/fvp_pm.c
+++ b/plat/arm/board/fvp/fvp_pm.c
@@ -228,7 +228,11 @@
  * FVP handler called when a power domain is about to be suspended. The
  * target_state encodes the power state that each level should transition to.
  ******************************************************************************/
+#if PSCI_OS_INIT_MODE
+static int fvp_pwr_domain_suspend(const psci_power_state_t *target_state)
+#else
 static void fvp_pwr_domain_suspend(const psci_power_state_t *target_state)
+#endif
 {
 	unsigned long mpidr;
 
@@ -238,7 +242,11 @@
 	 */
 	if (target_state->pwr_domain_state[ARM_PWR_LVL0] ==
 					ARM_LOCAL_STATE_RET)
+#if PSCI_OS_INIT_MODE
+		return PSCI_E_SUCCESS;
+#else
 		return;
+#endif
 
 	assert(target_state->pwr_domain_state[ARM_PWR_LVL0] ==
 					ARM_LOCAL_STATE_OFF);
@@ -270,6 +278,12 @@
 
 	/* Program the power controller to power off this cpu. */
 	fvp_pwrc_write_ppoffr(read_mpidr_el1());
+
+#if PSCI_OS_INIT_MODE
+	return PSCI_E_SUCCESS;
+#else
+	return;
+#endif
 }
 
 /*******************************************************************************
diff --git a/plat/arm/board/fvp/include/platform_def.h b/plat/arm/board/fvp/include/platform_def.h
index 039f8e2..84e2e82 100644
--- a/plat/arm/board/fvp/include/platform_def.h
+++ b/plat/arm/board/fvp/include/platform_def.h
@@ -30,6 +30,10 @@
 
 #define PLAT_MAX_PWR_LVL		ARM_PWR_LVL2
 
+#if PSCI_OS_INIT_MODE
+#define PLAT_MAX_CPU_SUSPEND_PWR_LVL	ARM_PWR_LVL1
+#endif
+
 /*
  * Other platform porting definitions are provided by included headers
  */
diff --git a/plat/arm/board/fvp/platform.mk b/plat/arm/board/fvp/platform.mk
index 1d96a2a..12a4a27 100644
--- a/plat/arm/board/fvp/platform.mk
+++ b/plat/arm/board/fvp/platform.mk
@@ -319,12 +319,13 @@
 endif
 
 # Enable Activity Monitor Unit extensions by default
-ENABLE_AMU			:=	1
+ENABLE_FEAT_AMU			:=	2
+ENABLE_FEAT_AMUv1p1		:=	2
 
 # Enable dynamic mitigation support by default
 DYNAMIC_WORKAROUND_CVE_2018_3639	:=	1
 
-ifeq (${ENABLE_AMU},1)
+ifneq (${ENABLE_FEAT_AMU},0)
 BL31_SOURCES		+=	lib/cpus/aarch64/cpuamu.c		\
 				lib/cpus/aarch64/cpuamu_helpers.S
 
@@ -481,3 +482,5 @@
 ifeq (${SPMC_AT_EL3}, 1)
 PLAT_BL_COMMON_SOURCES	+=	plat/arm/board/fvp/fvp_el3_spmc.c
 endif
+
+PSCI_OS_INIT_MODE	:=	1
diff --git a/plat/arm/board/fvp_r/platform.mk b/plat/arm/board/fvp_r/platform.mk
index 93b5cf2..5dd28b9 100644
--- a/plat/arm/board/fvp_r/platform.mk
+++ b/plat/arm/board/fvp_r/platform.mk
@@ -69,7 +69,7 @@
 endif
 
 # Enable Activity Monitor Unit extensions by default
-ENABLE_AMU			:=	1
+ENABLE_FEAT_AMU			:=	2
 
 ifneq (${ENABLE_STACK_PROTECTOR},0)
 FVP_R_BL_COMMON_SOURCES	+=	plat/arm/board/fvp_r/fvp_r_stack_protector.c
diff --git a/plat/arm/board/rdn2/platform.mk b/plat/arm/board/rdn2/platform.mk
index b30e3fc..ca55036 100644
--- a/plat/arm/board/rdn2/platform.mk
+++ b/plat/arm/board/rdn2/platform.mk
@@ -87,4 +87,4 @@
 $(eval $(call TOOL_ADD_PAYLOAD,${NT_FW_CONFIG},--nt-fw-config))
 
 override CTX_INCLUDE_AARCH32_REGS	:= 0
-override ENABLE_AMU			:= 1
+override ENABLE_FEAT_AMU		:= 1
diff --git a/plat/arm/board/rdv1/platform.mk b/plat/arm/board/rdv1/platform.mk
index 11f5212..a5fba67 100644
--- a/plat/arm/board/rdv1/platform.mk
+++ b/plat/arm/board/rdv1/platform.mk
@@ -57,7 +57,7 @@
 $(eval $(call TOOL_ADD_PAYLOAD,${NT_FW_CONFIG},--nt-fw-config,${NT_FW_CONFIG}))
 
 override CTX_INCLUDE_AARCH32_REGS	:= 0
-override ENABLE_AMU			:= 1
+override ENABLE_FEAT_AMU		:= 1
 
 ifneq ($(CSS_SGI_PLATFORM_VARIANT),0)
  $(error "CSS_SGI_PLATFORM_VARIANT for RD-V1 should always be 0, \
diff --git a/plat/arm/board/rdv1mc/platform.mk b/plat/arm/board/rdv1mc/platform.mk
index df0b09a..92f7c10 100644
--- a/plat/arm/board/rdv1mc/platform.mk
+++ b/plat/arm/board/rdv1mc/platform.mk
@@ -68,7 +68,7 @@
 $(eval $(call TOOL_ADD_PAYLOAD,${NT_FW_CONFIG},--nt-fw-config,${NT_FW_CONFIG}))
 
 override CTX_INCLUDE_AARCH32_REGS	:= 0
-override ENABLE_AMU			:= 1
+override ENABLE_FEAT_AMU		:= 1
 
 ifneq ($(CSS_SGI_PLATFORM_VARIANT),0)
  $(error "CSS_SGI_PLATFORM_VARIANT for RD-V1-MC should always be 0, \
diff --git a/plat/arm/board/tc/platform.mk b/plat/arm/board/tc/platform.mk
index 5f4148c..c6a82de 100644
--- a/plat/arm/board/tc/platform.mk
+++ b/plat/arm/board/tc/platform.mk
@@ -163,7 +163,7 @@
 
 override ENABLE_SPE_FOR_NS	:= 0
 
-override ENABLE_AMU := 1
+override ENABLE_FEAT_AMU := 1
 override ENABLE_AMU_AUXILIARY_COUNTERS := 1
 override ENABLE_AMU_FCONF := 1
 
diff --git a/plat/arm/common/arm_pm.c b/plat/arm/common/arm_pm.c
index 62cc8bb..055ab36 100644
--- a/plat/arm/common/arm_pm.c
+++ b/plat/arm/common/arm_pm.c
@@ -79,7 +79,12 @@
 	 *  search if the number of entries justify the additional complexity.
 	 */
 	for (i = 0; !!arm_pm_idle_states[i]; i++) {
+#if PSCI_OS_INIT_MODE
+		if ((power_state & ~ARM_LAST_AT_PLVL_MASK) ==
+					arm_pm_idle_states[i])
+#else
 		if (power_state == arm_pm_idle_states[i])
+#endif /* __PSCI_OS_INIT_MODE__ */
 			break;
 	}
 
@@ -91,11 +96,14 @@
 	state_id = psci_get_pstate_id(power_state);
 
 	/* Parse the State ID and populate the state info parameter */
-	while (state_id) {
-		req_state->pwr_domain_state[i++] = state_id &
+	for (i = ARM_PWR_LVL0; i <= PLAT_MAX_PWR_LVL; i++) {
+		req_state->pwr_domain_state[i] = state_id &
 						ARM_LOCAL_PSTATE_MASK;
 		state_id >>= ARM_LOCAL_PSTATE_WIDTH;
 	}
+#if PSCI_OS_INIT_MODE
+	req_state->last_at_pwrlvl = state_id & ARM_LOCAL_PSTATE_MASK;
+#endif /* __PSCI_OS_INIT_MODE__ */
 
 	return PSCI_E_SUCCESS;
 }
diff --git a/plat/mediatek/common/common_config.mk b/plat/mediatek/common/common_config.mk
index 851eb2c..31a61e0 100644
--- a/plat/mediatek/common/common_config.mk
+++ b/plat/mediatek/common/common_config.mk
@@ -19,7 +19,7 @@
 ENABLE_STACK_PROTECTOR := strong
 # AMU, Kernel will access amuserenr_el0 if PE supported
 # Firmware _must_ implement AMU support
-ENABLE_AMU := 1
+ENABLE_FEAT_AMU := 2
 VENDOR_EXTEND_PUBEVENT_ENABLE := 1
 
 # MTK define options
diff --git a/plat/qti/common/src/qti_pm.c b/plat/qti/common/src/qti_pm.c
index ae361e9..1405ca6 100644
--- a/plat/qti/common/src/qti_pm.c
+++ b/plat/qti/common/src/qti_pm.c
@@ -24,6 +24,12 @@
 #define QTI_LOCAL_PSTATE_WIDTH		4
 #define QTI_LOCAL_PSTATE_MASK		((1 << QTI_LOCAL_PSTATE_WIDTH) - 1)
 
+#if PSCI_OS_INIT_MODE
+#define QTI_LAST_AT_PLVL_MASK		(QTI_LOCAL_PSTATE_MASK <<	\
+					 (QTI_LOCAL_PSTATE_WIDTH *	\
+					  (PLAT_MAX_PWR_LVL + 1)))
+#endif
+
 /* Make composite power state parameter till level 0 */
 #define qti_make_pwrstate_lvl0(lvl0_state, type) \
 		(((lvl0_state) << PSTATE_ID_SHIFT) | ((type) << PSTATE_TYPE_SHIFT))
@@ -88,7 +94,12 @@
 	 *  search if the number of entries justify the additional complexity.
 	 */
 	for (i = 0; !!qti_pm_idle_states[i]; i++) {
+#if PSCI_OS_INIT_MODE
+		if ((power_state & ~QTI_LAST_AT_PLVL_MASK) ==
+		    qti_pm_idle_states[i])
+#else
 		if (power_state == qti_pm_idle_states[i])
+#endif
 			break;
 	}
 
@@ -100,11 +111,14 @@
 	state_id = psci_get_pstate_id(power_state);
 
 	/* Parse the State ID and populate the state info parameter */
-	while (state_id) {
-		req_state->pwr_domain_state[i++] = state_id &
+	for (i = QTI_PWR_LVL0; i <= PLAT_MAX_PWR_LVL; i++) {
+		req_state->pwr_domain_state[i] = state_id &
 		    QTI_LOCAL_PSTATE_MASK;
 		state_id >>= QTI_LOCAL_PSTATE_WIDTH;
 	}
+#if PSCI_OS_INIT_MODE
+	req_state->last_at_pwrlvl = state_id & QTI_LOCAL_PSTATE_MASK;
+#endif
 
 	return PSCI_E_SUCCESS;
 }
@@ -177,6 +191,18 @@
 	}
 }
 
+#if PSCI_OS_INIT_MODE
+static int qti_node_suspend(const psci_power_state_t *target_state)
+{
+	qtiseclib_psci_node_suspend((const uint8_t *)target_state->
+				    pwr_domain_state);
+	if (is_cpu_off(target_state)) {
+		plat_qti_gic_cpuif_disable();
+		qti_set_cpupwrctlr_val();
+	}
+	return PSCI_E_SUCCESS;
+}
+#else
 static void qti_node_suspend(const psci_power_state_t *target_state)
 {
 	qtiseclib_psci_node_suspend((const uint8_t *)target_state->
@@ -186,6 +212,7 @@
 		qti_set_cpupwrctlr_val();
 	}
 }
+#endif
 
 static void qti_node_suspend_finish(const psci_power_state_t *target_state)
 {
diff --git a/plat/qti/msm8916/platform.mk b/plat/qti/msm8916/platform.mk
index 60fb25d..2baf203 100644
--- a/plat/qti/msm8916/platform.mk
+++ b/plat/qti/msm8916/platform.mk
@@ -43,7 +43,6 @@
 WARMBOOT_ENABLE_DCACHE_EARLY	:= 1
 
 # Disable features unsupported in ARMv8.0
-ENABLE_AMU			:= 0
 ENABLE_SPE_FOR_NS		:= 0
 ENABLE_SVE_FOR_NS		:= 0
 
diff --git a/plat/qti/sc7280/platform.mk b/plat/qti/sc7280/platform.mk
index df07bc4..528a1d4 100644
--- a/plat/qti/sc7280/platform.mk
+++ b/plat/qti/sc7280/platform.mk
@@ -28,6 +28,7 @@
 # Enable PSCI v1.0 extended state ID format
 PSCI_EXTENDED_STATE_ID	:=  1
 ARM_RECOM_STATE_ID_ENC  :=  1
+PSCI_OS_INIT_MODE	:=  1
 
 COLD_BOOT_SINGLE_CPU		:=	1
 PROGRAMMABLE_RESET_ADDRESS	:=	1