Squashed 'lib/mbedtls/external/mbedtls/' content from commit 2ca6c285a0dd git-subtree-dir: lib/mbedtls/external/mbedtls git-subtree-split: 2ca6c285a0dd3f33982dd57299012dacab1ff206

commit: 0344c602eadc0802776b65ff90f0a02c856cf53c [log] [tgz]
author: Tom Rini <trini@konsulko.com> Tue Oct 08 13:56:50 2024 -0600
committer: Tom Rini <trini@konsulko.com> Tue Oct 08 13:56:50 2024 -0600
tree: 236a705740939b84ff37d68ae650061dd14c3449
diff --git a/3rdparty/.gitignore b/3rdparty/.gitignore
new file mode 100644
index 0000000..5fc607b
--- /dev/null
+++ b/3rdparty/.gitignore

@@ -0,0 +1 @@
+/Makefile

diff --git a/3rdparty/CMakeLists.txt b/3rdparty/CMakeLists.txt
new file mode 100644
index 0000000..fa149bd
--- /dev/null
+++ b/3rdparty/CMakeLists.txt

@@ -0,0 +1,2 @@
+add_subdirectory(everest)
+add_subdirectory(p256-m)

diff --git a/3rdparty/Makefile.inc b/3rdparty/Makefile.inc
new file mode 100644
index 0000000..70f316b
--- /dev/null
+++ b/3rdparty/Makefile.inc

@@ -0,0 +1,3 @@
+THIRDPARTY_DIR := $(dir $(lastword $(MAKEFILE_LIST)))
+include $(THIRDPARTY_DIR)/everest/Makefile.inc
+include $(THIRDPARTY_DIR)/p256-m/Makefile.inc

diff --git a/3rdparty/everest/.gitignore b/3rdparty/everest/.gitignore
new file mode 100644
index 0000000..f3c7a7c
--- /dev/null
+++ b/3rdparty/everest/.gitignore

@@ -0,0 +1 @@
+Makefile

diff --git a/3rdparty/everest/CMakeLists.txt b/3rdparty/everest/CMakeLists.txt
new file mode 100644
index 0000000..e0e5ade
--- /dev/null
+++ b/3rdparty/everest/CMakeLists.txt

@@ -0,0 +1,42 @@
+set(everest_target "${MBEDTLS_TARGET_PREFIX}everest")
+
+add_library(${everest_target}
+  library/everest.c
+  library/x25519.c
+  library/Hacl_Curve25519_joined.c)
+
+target_include_directories(${everest_target}
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+         $<BUILD_INTERFACE:${MBEDTLS_DIR}/include>
+         $<INSTALL_INTERFACE:include>
+  PRIVATE include/everest
+          include/everest/kremlib
+          ${MBEDTLS_DIR}/library/)
+
+# Pass-through MBEDTLS_CONFIG_FILE and MBEDTLS_USER_CONFIG_FILE
+# This must be duplicated from library/CMakeLists.txt because
+# everest is not directly linked against any mbedtls targets
+# so does not inherit the compile definitions.
+if(MBEDTLS_CONFIG_FILE)
+    target_compile_definitions(${everest_target}
+        PUBLIC MBEDTLS_CONFIG_FILE="${MBEDTLS_CONFIG_FILE}")
+endif()
+if(MBEDTLS_USER_CONFIG_FILE)
+    target_compile_definitions(${everest_target}
+        PUBLIC MBEDTLS_USER_CONFIG_FILE="${MBEDTLS_USER_CONFIG_FILE}")
+endif()
+
+if(INSTALL_MBEDTLS_HEADERS)
+
+  install(DIRECTORY include/everest
+    DESTINATION include
+    FILE_PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
+    DIRECTORY_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
+    FILES_MATCHING PATTERN "*.h")
+
+endif(INSTALL_MBEDTLS_HEADERS)
+
+install(TARGETS ${everest_target}
+  EXPORT MbedTLSTargets
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ)

diff --git a/3rdparty/everest/Makefile.inc b/3rdparty/everest/Makefile.inc
new file mode 100644
index 0000000..8055ce9
--- /dev/null
+++ b/3rdparty/everest/Makefile.inc

@@ -0,0 +1,6 @@
+THIRDPARTY_INCLUDES+=-I$(THIRDPARTY_DIR)/everest/include -I$(THIRDPARTY_DIR)/everest/include/everest -I$(THIRDPARTY_DIR)/everest/include/everest/kremlib
+
+THIRDPARTY_CRYPTO_OBJECTS+= \
+	$(THIRDPARTY_DIR)/everest/library/everest.o \
+	$(THIRDPARTY_DIR)/everest/library/x25519.o \
+	$(THIRDPARTY_DIR)/everest/library/Hacl_Curve25519_joined.o

diff --git a/3rdparty/everest/README.md b/3rdparty/everest/README.md
new file mode 100644
index 0000000..bcf12c0
--- /dev/null
+++ b/3rdparty/everest/README.md

@@ -0,0 +1,5 @@
+The files in this directory stem from [Project Everest](https://project-everest.github.io/) and are distributed under the Apache 2.0 license.
+
+This is a formally verified implementation of Curve25519-based handshakes. The C code is automatically derived from the (verified) [original implementation](https://github.com/project-everest/hacl-star/tree/master/code/curve25519) in the [F* language](https://github.com/fstarlang/fstar) by [KreMLin](https://github.com/fstarlang/kremlin). In addition to the improved safety and security of the implementation, it is also significantly faster than the default implementation of Curve25519 in mbedTLS.
+
+The caveat is that not all platforms are supported, although the version in `everest/library/legacy` should work on most systems. The main issue is that some platforms do not provide a 128-bit integer type and KreMLin therefore has to use additional (also verified) code to simulate them, resulting in less of a performance gain overall. Explicitly supported platforms are currently `x86` and `x86_64` using gcc or clang, and Visual C (2010 and later).

diff --git a/3rdparty/everest/include/everest/Hacl_Curve25519.h b/3rdparty/everest/include/everest/Hacl_Curve25519.h
new file mode 100644
index 0000000..e3f5ba4
--- /dev/null
+++ b/3rdparty/everest/include/everest/Hacl_Curve25519.h

@@ -0,0 +1,21 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: /mnt/e/everest/verify/kremlin/krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -fbuiltin-uint128 -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -I /mnt/e/everest/verify/hacl-star/code/lib/kremlin -I /mnt/e/everest/verify/kremlin/kremlib/compat -I /mnt/e/everest/verify/hacl-star/specs -I /mnt/e/everest/verify/hacl-star/specs/old -I . -ccopt -march=native -verbose -ldopt -flto -tmpdir x25519-c -I ../bignum -bundle Hacl.Curve25519=* -minimal -add-include "kremlib.h" -skip-compilation x25519-c/out.krml -o x25519-c/Hacl_Curve25519.c
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+
+#ifndef __Hacl_Curve25519_H
+#define __Hacl_Curve25519_H
+
+
+#include "kremlib.h"
+
+void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint);
+
+#define __Hacl_Curve25519_H_DEFINED
+#endif

diff --git a/3rdparty/everest/include/everest/everest.h b/3rdparty/everest/include/everest/everest.h
new file mode 100644
index 0000000..392e792
--- /dev/null
+++ b/3rdparty/everest/include/everest/everest.h

@@ -0,0 +1,234 @@
+/*
+ *  Interface to code from Project Everest
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org).
+ */
+
+#ifndef MBEDTLS_EVEREST_H
+#define MBEDTLS_EVEREST_H
+
+#include "everest/x25519.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Defines the source of the imported EC key.
+ */
+typedef enum
+{
+    MBEDTLS_EVEREST_ECDH_OURS,   /**< Our key. */
+    MBEDTLS_EVEREST_ECDH_THEIRS, /**< The key of the peer. */
+} mbedtls_everest_ecdh_side;
+
+typedef struct {
+    mbedtls_x25519_context ctx;
+} mbedtls_ecdh_context_everest;
+
+
+/**
+ * \brief           This function sets up the ECDH context with the information
+ *                  given.
+ *
+ *                  This function should be called after mbedtls_ecdh_init() but
+ *                  before mbedtls_ecdh_make_params(). There is no need to call
+ *                  this function before mbedtls_ecdh_read_params().
+ *
+ *                  This is the first function used by a TLS server for ECDHE
+ *                  ciphersuites.
+ *
+ * \param ctx       The ECDH context to set up.
+ * \param grp_id    The group id of the group to set up the context for.
+ *
+ * \return          \c 0 on success.
+ */
+int mbedtls_everest_setup( mbedtls_ecdh_context_everest *ctx, int grp_id );
+
+/**
+ * \brief           This function frees a context.
+ *
+ * \param ctx       The context to free.
+ */
+void mbedtls_everest_free( mbedtls_ecdh_context_everest *ctx );
+
+/**
+ * \brief           This function generates a public key and a TLS
+ *                  ServerKeyExchange payload.
+ *
+ *                  This is the second function used by a TLS server for ECDHE
+ *                  ciphersuites. (It is called after mbedtls_ecdh_setup().)
+ *
+ * \note            This function assumes that the ECP group (grp) of the
+ *                  \p ctx context has already been properly set,
+ *                  for example, using mbedtls_ecp_group_load().
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param olen      The number of characters written.
+ * \param buf       The destination buffer.
+ * \param blen      The length of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_make_params( mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                 unsigned char *buf, size_t blen,
+                                 int( *f_rng )( void *, unsigned char *, size_t ),
+                                 void *p_rng );
+
+/**
+ * \brief           This function parses and processes a TLS ServerKeyExchange
+ *                  payload.
+ *
+ *                  This is the first function used by a TLS client for ECDHE
+ *                  ciphersuites.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param buf       The pointer to the start of the input buffer.
+ * \param end       The address for one Byte past the end of the buffer.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ *
+ */
+int mbedtls_everest_read_params( mbedtls_ecdh_context_everest *ctx,
+                                 const unsigned char **buf, const unsigned char *end );
+
+/**
+ * \brief           This function parses and processes a TLS ServerKeyExchange
+ *                  payload.
+ *
+ *                  This is the first function used by a TLS client for ECDHE
+ *                  ciphersuites.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param buf       The pointer to the start of the input buffer.
+ * \param end       The address for one Byte past the end of the buffer.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ *
+ */
+int mbedtls_everest_read_params( mbedtls_ecdh_context_everest *ctx,
+                                 const unsigned char **buf, const unsigned char *end );
+
+/**
+ * \brief           This function sets up an ECDH context from an EC key.
+ *
+ *                  It is used by clients and servers in place of the
+ *                  ServerKeyEchange for static ECDH, and imports ECDH
+ *                  parameters from the EC key information of a certificate.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context to set up.
+ * \param key       The EC key to use.
+ * \param side      Defines the source of the key: 1: Our key, or
+ *                  0: The key of the peer.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ *
+ */
+int mbedtls_everest_get_params( mbedtls_ecdh_context_everest *ctx, const mbedtls_ecp_keypair *key,
+                                mbedtls_everest_ecdh_side side );
+
+/**
+ * \brief           This function generates a public key and a TLS
+ *                  ClientKeyExchange payload.
+ *
+ *                  This is the second function used by a TLS client for ECDH(E)
+ *                  ciphersuites.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param olen      The number of Bytes written.
+ * \param buf       The destination buffer.
+ * \param blen      The size of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_make_public( mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                 unsigned char *buf, size_t blen,
+                                 int( *f_rng )( void *, unsigned char *, size_t ),
+                                 void *p_rng );
+
+/**
+ * \brief       This function parses and processes a TLS ClientKeyExchange
+ *              payload.
+ *
+ *              This is the third function used by a TLS server for ECDH(E)
+ *              ciphersuites. (It is called after mbedtls_ecdh_setup() and
+ *              mbedtls_ecdh_make_params().)
+ *
+ * \see         ecp.h
+ *
+ * \param ctx   The ECDH context.
+ * \param buf   The start of the input buffer.
+ * \param blen  The length of the input buffer.
+ *
+ * \return      \c 0 on success.
+ * \return      An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_read_public( mbedtls_ecdh_context_everest *ctx,
+                                 const unsigned char *buf, size_t blen );
+
+/**
+ * \brief           This function derives and exports the shared secret.
+ *
+ *                  This is the last function used by both TLS client
+ *                  and servers.
+ *
+ * \note            If \p f_rng is not NULL, it is used to implement
+ *                  countermeasures against side-channel attacks.
+ *                  For more information, see mbedtls_ecp_mul().
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The ECDH context.
+ * \param olen      The number of Bytes written.
+ * \param buf       The destination buffer.
+ * \param blen      The length of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_everest_calc_secret( mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                 unsigned char *buf, size_t blen,
+                                 int( *f_rng )( void *, unsigned char *, size_t ),
+                                 void *p_rng );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* MBEDTLS_EVEREST_H */

diff --git a/3rdparty/everest/include/everest/kremlib.h b/3rdparty/everest/include/everest/kremlib.h
new file mode 100644
index 0000000..f06663f
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlib.h

@@ -0,0 +1,29 @@
+/*
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org) and
+ *  originated from Project Everest (https://project-everest.github.io/)
+ */
+
+#ifndef __KREMLIB_H
+#define __KREMLIB_H
+
+#include "kremlin/internal/target.h"
+#include "kremlin/internal/types.h"
+#include "kremlin/c_endianness.h"
+
+#endif     /* __KREMLIB_H */

diff --git a/3rdparty/everest/include/everest/kremlib/FStar_UInt128.h b/3rdparty/everest/include/everest/kremlib/FStar_UInt128.h
new file mode 100644
index 0000000..d71c882
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlib/FStar_UInt128.h

@@ -0,0 +1,124 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: ../krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrB9w -minimal -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -tmpdir dist/uint128 -skip-compilation -extract-uints -add-include <inttypes.h> -add-include <stdbool.h> -add-include "kremlin/internal/types.h" -bundle FStar.UInt128=* extracted/prims.krml extracted/FStar_Pervasives_Native.krml extracted/FStar_Pervasives.krml extracted/FStar_Mul.krml extracted/FStar_Squash.krml extracted/FStar_Classical.krml extracted/FStar_StrongExcludedMiddle.krml extracted/FStar_FunctionalExtensionality.krml extracted/FStar_List_Tot_Base.krml extracted/FStar_List_Tot_Properties.krml extracted/FStar_List_Tot.krml extracted/FStar_Seq_Base.krml extracted/FStar_Seq_Properties.krml extracted/FStar_Seq.krml extracted/FStar_Math_Lib.krml extracted/FStar_Math_Lemmas.krml extracted/FStar_BitVector.krml extracted/FStar_UInt.krml extracted/FStar_UInt32.krml extracted/FStar_Int.krml extracted/FStar_Int16.krml extracted/FStar_Preorder.krml extracted/FStar_Ghost.krml extracted/FStar_ErasedLogic.krml extracted/FStar_UInt64.krml extracted/FStar_Set.krml extracted/FStar_PropositionalExtensionality.krml extracted/FStar_PredicateExtensionality.krml extracted/FStar_TSet.krml extracted/FStar_Monotonic_Heap.krml extracted/FStar_Heap.krml extracted/FStar_Map.krml extracted/FStar_Monotonic_HyperHeap.krml extracted/FStar_Monotonic_HyperStack.krml extracted/FStar_HyperStack.krml extracted/FStar_Monotonic_Witnessed.krml extracted/FStar_HyperStack_ST.krml extracted/FStar_HyperStack_All.krml extracted/FStar_Date.krml extracted/FStar_Universe.krml extracted/FStar_GSet.krml extracted/FStar_ModifiesGen.krml extracted/LowStar_Monotonic_Buffer.krml extracted/LowStar_Buffer.krml extracted/Spec_Loops.krml extracted/LowStar_BufferOps.krml extracted/C_Loops.krml extracted/FStar_UInt8.krml extracted/FStar_Kremlin_Endianness.krml extracted/FStar_UInt63.krml extracted/FStar_Exn.krml extracted/FStar_ST.krml extracted/FStar_All.krml extracted/FStar_Dyn.krml extracted/FStar_Int63.krml extracted/FStar_Int64.krml extracted/FStar_Int32.krml extracted/FStar_Int8.krml extracted/FStar_UInt16.krml extracted/FStar_Int_Cast.krml extracted/FStar_UInt128.krml extracted/C_Endianness.krml extracted/FStar_List.krml extracted/FStar_Float.krml extracted/FStar_IO.krml extracted/C.krml extracted/FStar_Char.krml extracted/FStar_String.krml extracted/LowStar_Modifies.krml extracted/C_String.krml extracted/FStar_Bytes.krml extracted/FStar_HyperStack_IO.krml extracted/C_Failure.krml extracted/TestLib.krml extracted/FStar_Int_Cast_Full.krml
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+
+#ifndef __FStar_UInt128_H
+#define __FStar_UInt128_H
+
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/internal/types.h"
+
+uint64_t FStar_UInt128___proj__Mkuint128__item__low(FStar_UInt128_uint128 projectee);
+
+uint64_t FStar_UInt128___proj__Mkuint128__item__high(FStar_UInt128_uint128 projectee);
+
+typedef FStar_UInt128_uint128 FStar_UInt128_t;
+
+FStar_UInt128_uint128 FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a);
+
+FStar_UInt128_uint128 FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s);
+
+FStar_UInt128_uint128 FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s);
+
+bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b);
+
+FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a);
+
+uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Plus_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Plus_Question_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Plus_Percent_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Subtraction_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Subtraction_Question_Hat)(
+  FStar_UInt128_uint128 x0,
+  FStar_UInt128_uint128 x1
+);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Subtraction_Percent_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Amp_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Hat_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Bar_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Less_Less_Hat)(FStar_UInt128_uint128 x0, uint32_t x1);
+
+extern FStar_UInt128_uint128
+(*FStar_UInt128_op_Greater_Greater_Hat)(FStar_UInt128_uint128 x0, uint32_t x1);
+
+extern bool (*FStar_UInt128_op_Equals_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern bool
+(*FStar_UInt128_op_Greater_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern bool (*FStar_UInt128_op_Less_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern bool
+(*FStar_UInt128_op_Greater_Equals_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern bool
+(*FStar_UInt128_op_Less_Equals_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y);
+
+FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y);
+
+#define __FStar_UInt128_H_DEFINED
+#endif

diff --git a/3rdparty/everest/include/everest/kremlib/FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.h b/3rdparty/everest/include/everest/kremlib/FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.h
new file mode 100644
index 0000000..21560c4
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlib/FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.h

@@ -0,0 +1,280 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: ../krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrB9w -minimal -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -tmpdir dist/minimal -skip-compilation -extract-uints -add-include <inttypes.h> -add-include <stdbool.h> -add-include "kremlin/internal/compat.h" -add-include "kremlin/internal/types.h" -bundle FStar.UInt64+FStar.UInt32+FStar.UInt16+FStar.UInt8=* extracted/prims.krml extracted/FStar_Pervasives_Native.krml extracted/FStar_Pervasives.krml extracted/FStar_Mul.krml extracted/FStar_Squash.krml extracted/FStar_Classical.krml extracted/FStar_StrongExcludedMiddle.krml extracted/FStar_FunctionalExtensionality.krml extracted/FStar_List_Tot_Base.krml extracted/FStar_List_Tot_Properties.krml extracted/FStar_List_Tot.krml extracted/FStar_Seq_Base.krml extracted/FStar_Seq_Properties.krml extracted/FStar_Seq.krml extracted/FStar_Math_Lib.krml extracted/FStar_Math_Lemmas.krml extracted/FStar_BitVector.krml extracted/FStar_UInt.krml extracted/FStar_UInt32.krml extracted/FStar_Int.krml extracted/FStar_Int16.krml extracted/FStar_Preorder.krml extracted/FStar_Ghost.krml extracted/FStar_ErasedLogic.krml extracted/FStar_UInt64.krml extracted/FStar_Set.krml extracted/FStar_PropositionalExtensionality.krml extracted/FStar_PredicateExtensionality.krml extracted/FStar_TSet.krml extracted/FStar_Monotonic_Heap.krml extracted/FStar_Heap.krml extracted/FStar_Map.krml extracted/FStar_Monotonic_HyperHeap.krml extracted/FStar_Monotonic_HyperStack.krml extracted/FStar_HyperStack.krml extracted/FStar_Monotonic_Witnessed.krml extracted/FStar_HyperStack_ST.krml extracted/FStar_HyperStack_All.krml extracted/FStar_Date.krml extracted/FStar_Universe.krml extracted/FStar_GSet.krml extracted/FStar_ModifiesGen.krml extracted/LowStar_Monotonic_Buffer.krml extracted/LowStar_Buffer.krml extracted/Spec_Loops.krml extracted/LowStar_BufferOps.krml extracted/C_Loops.krml extracted/FStar_UInt8.krml extracted/FStar_Kremlin_Endianness.krml extracted/FStar_UInt63.krml extracted/FStar_Exn.krml extracted/FStar_ST.krml extracted/FStar_All.krml extracted/FStar_Dyn.krml extracted/FStar_Int63.krml extracted/FStar_Int64.krml extracted/FStar_Int32.krml extracted/FStar_Int8.krml extracted/FStar_UInt16.krml extracted/FStar_Int_Cast.krml extracted/FStar_UInt128.krml extracted/C_Endianness.krml extracted/FStar_List.krml extracted/FStar_Float.krml extracted/FStar_IO.krml extracted/C.krml extracted/FStar_Char.krml extracted/FStar_String.krml extracted/LowStar_Modifies.krml extracted/C_String.krml extracted/FStar_Bytes.krml extracted/FStar_HyperStack_IO.krml extracted/C_Failure.krml extracted/TestLib.krml extracted/FStar_Int_Cast_Full.krml
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+
+#ifndef __FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8_H
+#define __FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8_H
+
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include "kremlin/internal/compat.h"
+#include "kremlin/internal/types.h"
+
+extern Prims_int FStar_UInt64_n;
+
+extern Prims_int FStar_UInt64_v(uint64_t x0);
+
+extern uint64_t FStar_UInt64_uint_to_t(Prims_int x0);
+
+extern uint64_t FStar_UInt64_add(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_add_underspec(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_add_mod(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_sub(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_sub_underspec(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_sub_mod(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_mul(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_mul_underspec(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_mul_mod(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_mul_div(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_div(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_rem(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_logand(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_logxor(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_logor(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_lognot(uint64_t x0);
+
+extern uint64_t FStar_UInt64_shift_right(uint64_t x0, uint32_t x1);
+
+extern uint64_t FStar_UInt64_shift_left(uint64_t x0, uint32_t x1);
+
+extern bool FStar_UInt64_eq(uint64_t x0, uint64_t x1);
+
+extern bool FStar_UInt64_gt(uint64_t x0, uint64_t x1);
+
+extern bool FStar_UInt64_gte(uint64_t x0, uint64_t x1);
+
+extern bool FStar_UInt64_lt(uint64_t x0, uint64_t x1);
+
+extern bool FStar_UInt64_lte(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_minus(uint64_t x0);
+
+extern uint32_t FStar_UInt64_n_minus_one;
+
+uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b);
+
+uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b);
+
+extern Prims_string FStar_UInt64_to_string(uint64_t x0);
+
+extern uint64_t FStar_UInt64_of_string(Prims_string x0);
+
+extern Prims_int FStar_UInt32_n;
+
+extern Prims_int FStar_UInt32_v(uint32_t x0);
+
+extern uint32_t FStar_UInt32_uint_to_t(Prims_int x0);
+
+extern uint32_t FStar_UInt32_add(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_add_underspec(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_add_mod(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_sub(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_sub_underspec(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_sub_mod(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_mul(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_mul_underspec(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_mul_mod(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_mul_div(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_div(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_rem(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_logand(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_logxor(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_logor(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_lognot(uint32_t x0);
+
+extern uint32_t FStar_UInt32_shift_right(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_shift_left(uint32_t x0, uint32_t x1);
+
+extern bool FStar_UInt32_eq(uint32_t x0, uint32_t x1);
+
+extern bool FStar_UInt32_gt(uint32_t x0, uint32_t x1);
+
+extern bool FStar_UInt32_gte(uint32_t x0, uint32_t x1);
+
+extern bool FStar_UInt32_lt(uint32_t x0, uint32_t x1);
+
+extern bool FStar_UInt32_lte(uint32_t x0, uint32_t x1);
+
+extern uint32_t FStar_UInt32_minus(uint32_t x0);
+
+extern uint32_t FStar_UInt32_n_minus_one;
+
+uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b);
+
+uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b);
+
+extern Prims_string FStar_UInt32_to_string(uint32_t x0);
+
+extern uint32_t FStar_UInt32_of_string(Prims_string x0);
+
+extern Prims_int FStar_UInt16_n;
+
+extern Prims_int FStar_UInt16_v(uint16_t x0);
+
+extern uint16_t FStar_UInt16_uint_to_t(Prims_int x0);
+
+extern uint16_t FStar_UInt16_add(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_add_underspec(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_add_mod(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_sub(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_sub_underspec(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_sub_mod(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_mul(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_mul_underspec(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_mul_mod(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_mul_div(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_div(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_rem(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_logand(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_logxor(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_logor(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_lognot(uint16_t x0);
+
+extern uint16_t FStar_UInt16_shift_right(uint16_t x0, uint32_t x1);
+
+extern uint16_t FStar_UInt16_shift_left(uint16_t x0, uint32_t x1);
+
+extern bool FStar_UInt16_eq(uint16_t x0, uint16_t x1);
+
+extern bool FStar_UInt16_gt(uint16_t x0, uint16_t x1);
+
+extern bool FStar_UInt16_gte(uint16_t x0, uint16_t x1);
+
+extern bool FStar_UInt16_lt(uint16_t x0, uint16_t x1);
+
+extern bool FStar_UInt16_lte(uint16_t x0, uint16_t x1);
+
+extern uint16_t FStar_UInt16_minus(uint16_t x0);
+
+extern uint32_t FStar_UInt16_n_minus_one;
+
+uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b);
+
+uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b);
+
+extern Prims_string FStar_UInt16_to_string(uint16_t x0);
+
+extern uint16_t FStar_UInt16_of_string(Prims_string x0);
+
+extern Prims_int FStar_UInt8_n;
+
+extern Prims_int FStar_UInt8_v(uint8_t x0);
+
+extern uint8_t FStar_UInt8_uint_to_t(Prims_int x0);
+
+extern uint8_t FStar_UInt8_add(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_add_underspec(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_add_mod(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_sub(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_sub_underspec(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_sub_mod(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_mul(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_mul_underspec(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_mul_mod(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_mul_div(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_div(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_rem(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_logand(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_logxor(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_logor(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_lognot(uint8_t x0);
+
+extern uint8_t FStar_UInt8_shift_right(uint8_t x0, uint32_t x1);
+
+extern uint8_t FStar_UInt8_shift_left(uint8_t x0, uint32_t x1);
+
+extern bool FStar_UInt8_eq(uint8_t x0, uint8_t x1);
+
+extern bool FStar_UInt8_gt(uint8_t x0, uint8_t x1);
+
+extern bool FStar_UInt8_gte(uint8_t x0, uint8_t x1);
+
+extern bool FStar_UInt8_lt(uint8_t x0, uint8_t x1);
+
+extern bool FStar_UInt8_lte(uint8_t x0, uint8_t x1);
+
+extern uint8_t FStar_UInt8_minus(uint8_t x0);
+
+extern uint32_t FStar_UInt8_n_minus_one;
+
+uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b);
+
+uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b);
+
+extern Prims_string FStar_UInt8_to_string(uint8_t x0);
+
+extern uint8_t FStar_UInt8_of_string(Prims_string x0);
+
+typedef uint8_t FStar_UInt8_byte;
+
+#define __FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8_H_DEFINED
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/c_endianness.h b/3rdparty/everest/include/everest/kremlin/c_endianness.h
new file mode 100644
index 0000000..5cfde5d
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/c_endianness.h

@@ -0,0 +1,204 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_ENDIAN_H
+#define __KREMLIN_ENDIAN_H
+
+#include <string.h>
+#include <inttypes.h>
+
+/******************************************************************************/
+/* Implementing C.fst (part 2: endian-ness macros)                            */
+/******************************************************************************/
+
+/* ... for Linux */
+#if defined(__linux__) || defined(__CYGWIN__)
+#  include <endian.h>
+
+/* ... for OSX */
+#elif defined(__APPLE__)
+#  include <libkern/OSByteOrder.h>
+#  define htole64(x) OSSwapHostToLittleInt64(x)
+#  define le64toh(x) OSSwapLittleToHostInt64(x)
+#  define htobe64(x) OSSwapHostToBigInt64(x)
+#  define be64toh(x) OSSwapBigToHostInt64(x)
+
+#  define htole16(x) OSSwapHostToLittleInt16(x)
+#  define le16toh(x) OSSwapLittleToHostInt16(x)
+#  define htobe16(x) OSSwapHostToBigInt16(x)
+#  define be16toh(x) OSSwapBigToHostInt16(x)
+
+#  define htole32(x) OSSwapHostToLittleInt32(x)
+#  define le32toh(x) OSSwapLittleToHostInt32(x)
+#  define htobe32(x) OSSwapHostToBigInt32(x)
+#  define be32toh(x) OSSwapBigToHostInt32(x)
+
+/* ... for Solaris */
+#elif defined(__sun__)
+#  include <sys/byteorder.h>
+#  define htole64(x) LE_64(x)
+#  define le64toh(x) LE_64(x)
+#  define htobe64(x) BE_64(x)
+#  define be64toh(x) BE_64(x)
+
+#  define htole16(x) LE_16(x)
+#  define le16toh(x) LE_16(x)
+#  define htobe16(x) BE_16(x)
+#  define be16toh(x) BE_16(x)
+
+#  define htole32(x) LE_32(x)
+#  define le32toh(x) LE_32(x)
+#  define htobe32(x) BE_32(x)
+#  define be32toh(x) BE_32(x)
+
+/* ... for the BSDs */
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+#  include <sys/endian.h>
+#elif defined(__OpenBSD__)
+#  include <endian.h>
+
+/* ... for Windows (MSVC)... not targeting XBOX 360! */
+#elif defined(_MSC_VER)
+
+#  include <stdlib.h>
+#  define htobe16(x) _byteswap_ushort(x)
+#  define htole16(x) (x)
+#  define be16toh(x) _byteswap_ushort(x)
+#  define le16toh(x) (x)
+
+#  define htobe32(x) _byteswap_ulong(x)
+#  define htole32(x) (x)
+#  define be32toh(x) _byteswap_ulong(x)
+#  define le32toh(x) (x)
+
+#  define htobe64(x) _byteswap_uint64(x)
+#  define htole64(x) (x)
+#  define be64toh(x) _byteswap_uint64(x)
+#  define le64toh(x) (x)
+
+/* ... for Windows (GCC-like, e.g. mingw or clang) */
+#elif (defined(_WIN32) || defined(_WIN64)) &&                                  \
+    (defined(__GNUC__) || defined(__clang__))
+
+#  define htobe16(x) __builtin_bswap16(x)
+#  define htole16(x) (x)
+#  define be16toh(x) __builtin_bswap16(x)
+#  define le16toh(x) (x)
+
+#  define htobe32(x) __builtin_bswap32(x)
+#  define htole32(x) (x)
+#  define be32toh(x) __builtin_bswap32(x)
+#  define le32toh(x) (x)
+
+#  define htobe64(x) __builtin_bswap64(x)
+#  define htole64(x) (x)
+#  define be64toh(x) __builtin_bswap64(x)
+#  define le64toh(x) (x)
+
+/* ... generic big-endian fallback code */
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+
+/* byte swapping code inspired by:
+ * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h
+ * */
+
+#  define htobe32(x) (x)
+#  define be32toh(x) (x)
+#  define htole32(x)                                                           \
+    (__extension__({                                                           \
+      uint32_t _temp = (x);                                                    \
+      ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) |             \
+          ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000);          \
+    }))
+#  define le32toh(x) (htole32((x)))
+
+#  define htobe64(x) (x)
+#  define be64toh(x) (x)
+#  define htole64(x)                                                           \
+    (__extension__({                                                           \
+      uint64_t __temp = (x);                                                   \
+      uint32_t __low = htobe32((uint32_t)__temp);                              \
+      uint32_t __high = htobe32((uint32_t)(__temp >> 32));                     \
+      (((uint64_t)__low) << 32) | __high;                                      \
+    }))
+#  define le64toh(x) (htole64((x)))
+
+/* ... generic little-endian fallback code */
+#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+
+#  define htole32(x) (x)
+#  define le32toh(x) (x)
+#  define htobe32(x)                                                           \
+    (__extension__({                                                           \
+      uint32_t _temp = (x);                                                    \
+      ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) |             \
+          ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000);          \
+    }))
+#  define be32toh(x) (htobe32((x)))
+
+#  define htole64(x) (x)
+#  define le64toh(x) (x)
+#  define htobe64(x)                                                           \
+    (__extension__({                                                           \
+      uint64_t __temp = (x);                                                   \
+      uint32_t __low = htobe32((uint32_t)__temp);                              \
+      uint32_t __high = htobe32((uint32_t)(__temp >> 32));                     \
+      (((uint64_t)__low) << 32) | __high;                                      \
+    }))
+#  define be64toh(x) (htobe64((x)))
+
+/* ... couldn't determine endian-ness of the target platform */
+#else
+#  error "Please define __BYTE_ORDER__!"
+
+#endif /* defined(__linux__) || ... */
+
+/* Loads and stores. These avoid undefined behavior due to unaligned memory
+ * accesses, via memcpy. */
+
+inline static uint16_t load16(uint8_t *b) {
+  uint16_t x;
+  memcpy(&x, b, 2);
+  return x;
+}
+
+inline static uint32_t load32(uint8_t *b) {
+  uint32_t x;
+  memcpy(&x, b, 4);
+  return x;
+}
+
+inline static uint64_t load64(uint8_t *b) {
+  uint64_t x;
+  memcpy(&x, b, 8);
+  return x;
+}
+
+inline static void store16(uint8_t *b, uint16_t i) {
+  memcpy(b, &i, 2);
+}
+
+inline static void store32(uint8_t *b, uint32_t i) {
+  memcpy(b, &i, 4);
+}
+
+inline static void store64(uint8_t *b, uint64_t i) {
+  memcpy(b, &i, 8);
+}
+
+#define load16_le(b) (le16toh(load16(b)))
+#define store16_le(b, i) (store16(b, htole16(i)))
+#define load16_be(b) (be16toh(load16(b)))
+#define store16_be(b, i) (store16(b, htobe16(i)))
+
+#define load32_le(b) (le32toh(load32(b)))
+#define store32_le(b, i) (store32(b, htole32(i)))
+#define load32_be(b) (be32toh(load32(b)))
+#define store32_be(b, i) (store32(b, htobe32(i)))
+
+#define load64_le(b) (le64toh(load64(b)))
+#define store64_le(b, i) (store64(b, htole64(i)))
+#define load64_be(b) (be64toh(load64(b)))
+#define store64_be(b, i) (store64(b, htobe64(i)))
+
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/internal/builtin.h b/3rdparty/everest/include/everest/kremlin/internal/builtin.h
new file mode 100644
index 0000000..219b266
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/internal/builtin.h

@@ -0,0 +1,16 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_BUILTIN_H
+#define __KREMLIN_BUILTIN_H
+
+/* For alloca, when using KreMLin's -falloca */
+#if (defined(_WIN32) || defined(_WIN64))
+#  include <malloc.h>
+#endif
+
+/* If some globals need to be initialized before the main, then kremlin will
+ * generate and try to link last a function with this type: */
+void kremlinit_globals(void);
+
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/internal/callconv.h b/3rdparty/everest/include/everest/kremlin/internal/callconv.h
new file mode 100644
index 0000000..bf631ff
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/internal/callconv.h

@@ -0,0 +1,46 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_CALLCONV_H
+#define __KREMLIN_CALLCONV_H
+
+/******************************************************************************/
+/* Some macros to ease compatibility                                          */
+/******************************************************************************/
+
+/* We want to generate __cdecl safely without worrying about it being undefined.
+ * When using MSVC, these are always defined. When using MinGW, these are
+ * defined too. They have no meaning for other platforms, so we define them to
+ * be empty macros in other situations. */
+#ifndef _MSC_VER
+#ifndef __cdecl
+#define __cdecl
+#endif
+#ifndef __stdcall
+#define __stdcall
+#endif
+#ifndef __fastcall
+#define __fastcall
+#endif
+#endif
+
+/* Since KreMLin emits the inline keyword unconditionally, we follow the
+ * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this
+ * __inline__ to ensure the code compiles with -std=c90 and earlier. */
+#ifdef __GNUC__
+#  define inline __inline__
+#endif
+
+/* GCC-specific attribute syntax; everyone else gets the standard C inline
+ * attribute. */
+#ifdef __GNU_C__
+#  ifndef __clang__
+#    define force_inline inline __attribute__((always_inline))
+#  else
+#    define force_inline inline
+#  endif
+#else
+#  define force_inline inline
+#endif
+
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/internal/compat.h b/3rdparty/everest/include/everest/kremlin/internal/compat.h
new file mode 100644
index 0000000..a5b8889
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/internal/compat.h

@@ -0,0 +1,34 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef KRML_COMPAT_H
+#define KRML_COMPAT_H
+
+#include <inttypes.h>
+
+/* A series of macros that define C implementations of types that are not Low*,
+ * to facilitate porting programs to Low*. */
+
+typedef const char *Prims_string;
+
+typedef struct {
+  uint32_t length;
+  const char *data;
+} FStar_Bytes_bytes;
+
+typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int,
+    krml_checked_int_t;
+
+#define RETURN_OR(x)                                                           \
+  do {                                                                         \
+    int64_t __ret = x;                                                         \
+    if (__ret < INT32_MIN || INT32_MAX < __ret) {                              \
+      KRML_HOST_PRINTF(                                                        \
+          "Prims.{int,nat,pos} integer overflow at %s:%d\n", __FILE__,         \
+          __LINE__);                                                           \
+      KRML_HOST_EXIT(252);                                                     \
+    }                                                                          \
+    return (int32_t)__ret;                                                     \
+  } while (0)
+
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/internal/debug.h b/3rdparty/everest/include/everest/kremlin/internal/debug.h
new file mode 100644
index 0000000..44ac22c
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/internal/debug.h

@@ -0,0 +1,57 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_DEBUG_H
+#define __KREMLIN_DEBUG_H
+
+#include <inttypes.h>
+
+#include "kremlin/internal/target.h"
+
+/******************************************************************************/
+/* Debugging helpers - intended only for KreMLin developers                   */
+/******************************************************************************/
+
+/* In support of "-wasm -d force-c": we might need this function to be
+ * forward-declared, because the dependency on WasmSupport appears very late,
+ * after SimplifyWasm, and sadly, after the topological order has been done. */
+void WasmSupport_check_buffer_size(uint32_t s);
+
+/* A series of GCC atrocities to trace function calls (kremlin's [-d c-calls]
+ * option). Useful when trying to debug, say, Wasm, to compare traces. */
+/* clang-format off */
+#ifdef __GNUC__
+#define KRML_FORMAT(X) _Generic((X),                                           \
+  uint8_t : "0x%08" PRIx8,                                                     \
+  uint16_t: "0x%08" PRIx16,                                                    \
+  uint32_t: "0x%08" PRIx32,                                                    \
+  uint64_t: "0x%08" PRIx64,                                                    \
+  int8_t  : "0x%08" PRIx8,                                                     \
+  int16_t : "0x%08" PRIx16,                                                    \
+  int32_t : "0x%08" PRIx32,                                                    \
+  int64_t : "0x%08" PRIx64,                                                    \
+  default : "%s")
+
+#define KRML_FORMAT_ARG(X) _Generic((X),                                       \
+  uint8_t : X,                                                                 \
+  uint16_t: X,                                                                 \
+  uint32_t: X,                                                                 \
+  uint64_t: X,                                                                 \
+  int8_t  : X,                                                                 \
+  int16_t : X,                                                                 \
+  int32_t : X,                                                                 \
+  int64_t : X,                                                                 \
+  default : "unknown")
+/* clang-format on */
+
+#  define KRML_DEBUG_RETURN(X)                                                 \
+    ({                                                                         \
+      __auto_type _ret = (X);                                                  \
+      KRML_HOST_PRINTF("returning: ");                                         \
+      KRML_HOST_PRINTF(KRML_FORMAT(_ret), KRML_FORMAT_ARG(_ret));              \
+      KRML_HOST_PRINTF(" \n");                                                 \
+      _ret;                                                                    \
+    })
+#endif
+
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/internal/target.h b/3rdparty/everest/include/everest/kremlin/internal/target.h
new file mode 100644
index 0000000..b552f52
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/internal/target.h

@@ -0,0 +1,102 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef __KREMLIN_TARGET_H
+#define __KREMLIN_TARGET_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#include "kremlin/internal/callconv.h"
+
+/******************************************************************************/
+/* Macros that KreMLin will generate.                                         */
+/******************************************************************************/
+
+/* For "bare" targets that do not have a C stdlib, the user might want to use
+ * [-add-early-include '"mydefinitions.h"'] and override these. */
+#ifndef KRML_HOST_PRINTF
+#  define KRML_HOST_PRINTF printf
+#endif
+
+#if (                                                                          \
+    (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) &&             \
+    (!(defined KRML_HOST_EPRINTF)))
+#  define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__)
+#endif
+
+#ifndef KRML_HOST_EXIT
+#  define KRML_HOST_EXIT exit
+#endif
+
+#ifndef KRML_HOST_MALLOC
+#  define KRML_HOST_MALLOC malloc
+#endif
+
+#ifndef KRML_HOST_CALLOC
+#  define KRML_HOST_CALLOC calloc
+#endif
+
+#ifndef KRML_HOST_FREE
+#  define KRML_HOST_FREE free
+#endif
+
+#ifndef KRML_HOST_TIME
+
+#  include <time.h>
+
+/* Prims_nat not yet in scope */
+inline static int32_t krml_time() {
+  return (int32_t)time(NULL);
+}
+
+#  define KRML_HOST_TIME krml_time
+#endif
+
+/* In statement position, exiting is easy. */
+#define KRML_EXIT                                                              \
+  do {                                                                         \
+    KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \
+    KRML_HOST_EXIT(254);                                                       \
+  } while (0)
+
+/* In expression position, use the comma-operator and a malloc to return an
+ * expression of the right size. KreMLin passes t as the parameter to the macro.
+ */
+#define KRML_EABORT(t, msg)                                                    \
+  (KRML_HOST_PRINTF("KreMLin abort at %s:%d\n%s\n", __FILE__, __LINE__, msg),  \
+   KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t))))
+
+/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of
+ * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate).
+ */
+
+#ifdef __GNUC__
+#  define _KRML_CHECK_SIZE_PRAGMA                                              \
+    _Pragma("GCC diagnostic ignored \"-Wtype-limits\"")
+#else
+#  define _KRML_CHECK_SIZE_PRAGMA
+#endif
+
+#define KRML_CHECK_SIZE(size_elt, sz)                                          \
+  do {                                                                         \
+    _KRML_CHECK_SIZE_PRAGMA                                                    \
+    if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) {                  \
+      KRML_HOST_PRINTF(                                                        \
+          "Maximum allocatable size exceeded, aborting before overflow at "    \
+          "%s:%d\n",                                                           \
+          __FILE__, __LINE__);                                                 \
+      KRML_HOST_EXIT(253);                                                     \
+    }                                                                          \
+  } while (0)
+
+#if defined(_MSC_VER) && _MSC_VER < 1900
+#  define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) _snprintf_s(buf, sz, _TRUNCATE, fmt, arg)
+#else
+#  define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg)
+#endif
+
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/internal/types.h b/3rdparty/everest/include/everest/kremlin/internal/types.h
new file mode 100644
index 0000000..b936f00
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/internal/types.h

@@ -0,0 +1,61 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+#ifndef KRML_TYPES_H
+#define KRML_TYPES_H
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/* Types which are either abstract, meaning that have to be implemented in C, or
+ * which are models, meaning that they are swapped out at compile-time for
+ * hand-written C types (in which case they're marked as noextract). */
+
+typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_;
+typedef int64_t FStar_Int64_t, FStar_Int64_t_;
+typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_;
+typedef int32_t FStar_Int32_t, FStar_Int32_t_;
+typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_;
+typedef int16_t FStar_Int16_t, FStar_Int16_t_;
+typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_;
+typedef int8_t FStar_Int8_t, FStar_Int8_t_;
+
+/* Only useful when building Kremlib, because it's in the dependency graph of
+ * FStar.Int.Cast. */
+typedef uint64_t FStar_UInt63_t, FStar_UInt63_t_;
+typedef int64_t FStar_Int63_t, FStar_Int63_t_;
+
+typedef double FStar_Float_float;
+typedef uint32_t FStar_Char_char;
+typedef FILE *FStar_IO_fd_read, *FStar_IO_fd_write;
+
+typedef void *FStar_Dyn_dyn;
+
+typedef const char *C_String_t, *C_String_t_;
+
+typedef int exit_code;
+typedef FILE *channel;
+
+typedef unsigned long long TestLib_cycles;
+
+typedef uint64_t FStar_Date_dateTime, FStar_Date_timeSpan;
+
+/* The uint128 type is a special case since we offer several implementations of
+ * it, depending on the compiler and whether the user wants the verified
+ * implementation or not. */
+#if !defined(KRML_VERIFIED_UINT128) && defined(_MSC_VER) && defined(_M_X64)
+#  include <emmintrin.h>
+typedef __m128i FStar_UInt128_uint128;
+#elif !defined(KRML_VERIFIED_UINT128) && !defined(_MSC_VER)
+typedef unsigned __int128 FStar_UInt128_uint128;
+#else
+typedef struct FStar_UInt128_uint128_s {
+  uint64_t low;
+  uint64_t high;
+} FStar_UInt128_uint128;
+#endif
+
+typedef FStar_UInt128_uint128 FStar_UInt128_t, FStar_UInt128_t_, uint128_t;
+
+#endif

diff --git a/3rdparty/everest/include/everest/kremlin/internal/wasmsupport.h b/3rdparty/everest/include/everest/kremlin/internal/wasmsupport.h
new file mode 100644
index 0000000..b44fa3f
--- /dev/null
+++ b/3rdparty/everest/include/everest/kremlin/internal/wasmsupport.h

@@ -0,0 +1,5 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file is automatically included when compiling with -wasm -d force-c */
+#define WasmSupport_check_buffer_size(X)

diff --git a/3rdparty/everest/include/everest/vs2013/Hacl_Curve25519.h b/3rdparty/everest/include/everest/vs2013/Hacl_Curve25519.h
new file mode 100644
index 0000000..27ebe07
--- /dev/null
+++ b/3rdparty/everest/include/everest/vs2013/Hacl_Curve25519.h

@@ -0,0 +1,21 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: /mnt/e/everest/verify/kremlin/krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -I /mnt/e/everest/verify/hacl-star/code/lib/kremlin -I /mnt/e/everest/verify/kremlin/kremlib/compat -I /mnt/e/everest/verify/hacl-star/specs -I /mnt/e/everest/verify/hacl-star/specs/old -I . -ccopt -march=native -verbose -ldopt -flto -tmpdir x25519-c -I ../bignum -bundle Hacl.Curve25519=* -minimal -add-include "kremlib.h" -skip-compilation x25519-c/out.krml -o x25519-c/Hacl_Curve25519.c
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+
+#ifndef __Hacl_Curve25519_H
+#define __Hacl_Curve25519_H
+
+
+#include "kremlib.h"
+
+void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint);
+
+#define __Hacl_Curve25519_H_DEFINED
+#endif

diff --git a/3rdparty/everest/include/everest/vs2013/inttypes.h b/3rdparty/everest/include/everest/vs2013/inttypes.h
new file mode 100644
index 0000000..77003be
--- /dev/null
+++ b/3rdparty/everest/include/everest/vs2013/inttypes.h

@@ -0,0 +1,36 @@
+/*
+ *  Custom inttypes.h for VS2010 KreMLin requires these definitions,
+ *  but VS2010 doesn't provide them.
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org)
+ */
+
+#ifndef _INTTYPES_H_VS2010
+#define _INTTYPES_H_VS2010
+
+#include <stdint.h>
+
+#ifdef _MSC_VER
+#define inline __inline
+#endif
+
+/* VS2010 unsigned long == 8 bytes */
+
+#define PRIu64 "I64u"
+
+#endif

diff --git a/3rdparty/everest/include/everest/vs2013/stdbool.h b/3rdparty/everest/include/everest/vs2013/stdbool.h
new file mode 100644
index 0000000..dcae6d8
--- /dev/null
+++ b/3rdparty/everest/include/everest/vs2013/stdbool.h

@@ -0,0 +1,31 @@
+/*
+ *  Custom stdbool.h for VS2010 KreMLin requires these definitions,
+ *  but VS2010 doesn't provide them.
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org)
+ */
+
+#ifndef _STDBOOL_H_VS2010
+#define _STDBOOL_H_VS2010
+
+typedef int bool;
+
+static bool true = 1;
+static bool false = 0;
+
+#endif

diff --git a/3rdparty/everest/include/everest/x25519.h b/3rdparty/everest/include/everest/x25519.h
new file mode 100644
index 0000000..ef314d2
--- /dev/null
+++ b/3rdparty/everest/include/everest/x25519.h

@@ -0,0 +1,190 @@
+/*
+ *  ECDH with curve-optimized implementation multiplexing
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org)
+ */
+
+#ifndef MBEDTLS_X25519_H
+#define MBEDTLS_X25519_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MBEDTLS_ECP_TLS_CURVE25519 0x1d
+#define MBEDTLS_X25519_KEY_SIZE_BYTES 32
+
+/**
+ * Defines the source of the imported EC key.
+ */
+typedef enum
+{
+    MBEDTLS_X25519_ECDH_OURS,   /**< Our key. */
+    MBEDTLS_X25519_ECDH_THEIRS, /**< The key of the peer. */
+} mbedtls_x25519_ecdh_side;
+
+/**
+ * \brief           The x25519 context structure.
+ */
+typedef struct
+{
+  unsigned char our_secret[MBEDTLS_X25519_KEY_SIZE_BYTES];
+  unsigned char peer_point[MBEDTLS_X25519_KEY_SIZE_BYTES];
+} mbedtls_x25519_context;
+
+/**
+ * \brief           This function initializes an x25519 context.
+ *
+ * \param ctx       The x25519 context to initialize.
+ */
+void mbedtls_x25519_init( mbedtls_x25519_context *ctx );
+
+/**
+ * \brief           This function frees a context.
+ *
+ * \param ctx       The context to free.
+ */
+void mbedtls_x25519_free( mbedtls_x25519_context *ctx );
+
+/**
+ * \brief           This function generates a public key and a TLS
+ *                  ServerKeyExchange payload.
+ *
+ *                  This is the first function used by a TLS server for x25519.
+ *
+ *
+ * \param ctx       The x25519 context.
+ * \param olen      The number of characters written.
+ * \param buf       The destination buffer.
+ * \param blen      The length of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_x25519_make_params( mbedtls_x25519_context *ctx, size_t *olen,
+                        unsigned char *buf, size_t blen,
+                        int( *f_rng )(void *, unsigned char *, size_t),
+                        void *p_rng );
+
+/**
+ * \brief           This function parses and processes a TLS ServerKeyExchange
+ *                  payload.
+ *
+ *
+ * \param ctx       The x25519 context.
+ * \param buf       The pointer to the start of the input buffer.
+ * \param end       The address for one Byte past the end of the buffer.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ *
+ */
+int mbedtls_x25519_read_params( mbedtls_x25519_context *ctx,
+                        const unsigned char **buf, const unsigned char *end );
+
+/**
+ * \brief           This function sets up an x25519 context from an EC key.
+ *
+ *                  It is used by clients and servers in place of the
+ *                  ServerKeyEchange for static ECDH, and imports ECDH
+ *                  parameters from the EC key information of a certificate.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The x25519 context to set up.
+ * \param key       The EC key to use.
+ * \param side      Defines the source of the key: 1: Our key, or
+ *                  0: The key of the peer.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ *
+ */
+int mbedtls_x25519_get_params( mbedtls_x25519_context *ctx, const mbedtls_ecp_keypair *key,
+                               mbedtls_x25519_ecdh_side side );
+
+/**
+ * \brief           This function derives and exports the shared secret.
+ *
+ *                  This is the last function used by both TLS client
+ *                  and servers.
+ *
+ *
+ * \param ctx       The x25519 context.
+ * \param olen      The number of Bytes written.
+ * \param buf       The destination buffer.
+ * \param blen      The length of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_x25519_calc_secret( mbedtls_x25519_context *ctx, size_t *olen,
+                        unsigned char *buf, size_t blen,
+                        int( *f_rng )(void *, unsigned char *, size_t),
+                        void *p_rng );
+
+/**
+ * \brief           This function generates a public key and a TLS
+ *                  ClientKeyExchange payload.
+ *
+ *                  This is the second function used by a TLS client for x25519.
+ *
+ * \see             ecp.h
+ *
+ * \param ctx       The x25519 context.
+ * \param olen      The number of Bytes written.
+ * \param buf       The destination buffer.
+ * \param blen      The size of the destination buffer.
+ * \param f_rng     The RNG function.
+ * \param p_rng     The RNG context.
+ *
+ * \return          \c 0 on success.
+ * \return          An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_x25519_make_public( mbedtls_x25519_context *ctx, size_t *olen,
+                        unsigned char *buf, size_t blen,
+                        int( *f_rng )(void *, unsigned char *, size_t),
+                        void *p_rng );
+
+/**
+ * \brief       This function parses and processes a TLS ClientKeyExchange
+ *              payload.
+ *
+ *              This is the second function used by a TLS server for x25519.
+ *
+ * \see         ecp.h
+ *
+ * \param ctx   The x25519 context.
+ * \param buf   The start of the input buffer.
+ * \param blen  The length of the input buffer.
+ *
+ * \return      \c 0 on success.
+ * \return      An \c MBEDTLS_ERR_ECP_XXX error code on failure.
+ */
+int mbedtls_x25519_read_public( mbedtls_x25519_context *ctx,
+                        const unsigned char *buf, size_t blen );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* x25519.h */

diff --git a/3rdparty/everest/library/Hacl_Curve25519.c b/3rdparty/everest/library/Hacl_Curve25519.c
new file mode 100644
index 0000000..450b9f8
--- /dev/null
+++ b/3rdparty/everest/library/Hacl_Curve25519.c

@@ -0,0 +1,760 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: /mnt/e/everest/verify/kremlin/krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -fbuiltin-uint128 -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -I /mnt/e/everest/verify/hacl-star/code/lib/kremlin -I /mnt/e/everest/verify/kremlin/kremlib/compat -I /mnt/e/everest/verify/hacl-star/specs -I /mnt/e/everest/verify/hacl-star/specs/old -I . -ccopt -march=native -verbose -ldopt -flto -tmpdir x25519-c -I ../bignum -bundle Hacl.Curve25519=* -minimal -add-include "kremlib.h" -skip-compilation x25519-c/out.krml -o x25519-c/Hacl_Curve25519.c
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+#include "Hacl_Curve25519.h"
+
+extern uint64_t FStar_UInt64_eq_mask(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_gte_mask(uint64_t x0, uint64_t x1);
+
+extern uint128_t FStar_UInt128_add(uint128_t x0, uint128_t x1);
+
+extern uint128_t FStar_UInt128_add_mod(uint128_t x0, uint128_t x1);
+
+extern uint128_t FStar_UInt128_logand(uint128_t x0, uint128_t x1);
+
+extern uint128_t FStar_UInt128_shift_right(uint128_t x0, uint32_t x1);
+
+extern uint128_t FStar_UInt128_uint64_to_uint128(uint64_t x0);
+
+extern uint64_t FStar_UInt128_uint128_to_uint64(uint128_t x0);
+
+extern uint128_t FStar_UInt128_mul_wide(uint64_t x0, uint64_t x1);
+
+static void Hacl_Bignum_Modulo_carry_top(uint64_t *b)
+{
+  uint64_t b4 = b[4U];
+  uint64_t b0 = b[0U];
+  uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU;
+  uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U);
+  b[4U] = b4_;
+  b[0U] = b0_;
+}
+
+inline static void Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, uint128_t *input)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    uint128_t xi = input[i];
+    output[i] = (uint64_t)xi;
+  }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint128_t *output, uint64_t *input, uint64_t s)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    uint128_t xi = output[i];
+    uint64_t yi = input[i];
+    output[i] = xi + (uint128_t)yi * s;
+  }
+}
+
+inline static void Hacl_Bignum_Fproduct_carry_wide_(uint128_t *tmp)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
+  {
+    uint32_t ctr = i;
+    uint128_t tctr = tmp[ctr];
+    uint128_t tctrp1 = tmp[ctr + (uint32_t)1U];
+    uint64_t r0 = (uint64_t)tctr & (uint64_t)0x7ffffffffffffU;
+    uint128_t c = tctr >> (uint32_t)51U;
+    tmp[ctr] = (uint128_t)r0;
+    tmp[ctr + (uint32_t)1U] = tctrp1 + c;
+  }
+}
+
+inline static void Hacl_Bignum_Fmul_shift_reduce(uint64_t *output)
+{
+  uint64_t tmp = output[4U];
+  uint64_t b0;
+  {
+    uint32_t i;
+    for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
+    {
+      uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U;
+      uint64_t z = output[ctr - (uint32_t)1U];
+      output[ctr] = z;
+    }
+  }
+  output[0U] = tmp;
+  b0 = output[0U];
+  output[0U] = (uint64_t)19U * b0;
+}
+
+static void
+Hacl_Bignum_Fmul_mul_shift_reduce_(uint128_t *output, uint64_t *input, uint64_t *input2)
+{
+  uint32_t i;
+  uint64_t input2i;
+  {
+    uint32_t i0;
+    for (i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0 = i0 + (uint32_t)1U)
+    {
+      uint64_t input2i0 = input2[i0];
+      Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i0);
+      Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+  }
+  i = (uint32_t)4U;
+  input2i = input2[i];
+  Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+}
+
+inline static void Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2)
+{
+  uint64_t tmp[5U] = { 0U };
+  memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]);
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = (uint128_t)(uint64_t)0U;
+    }
+    {
+      uint128_t b4;
+      uint128_t b0;
+      uint128_t b4_;
+      uint128_t b0_;
+      uint64_t i0;
+      uint64_t i1;
+      uint64_t i0_;
+      uint64_t i1_;
+      Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
+      Hacl_Bignum_Fproduct_carry_wide_(t);
+      b4 = t[4U];
+      b0 = t[0U];
+      b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
+      b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
+      t[4U] = b4_;
+      t[0U] = b0_;
+      Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
+      i0 = output[0U];
+      i1 = output[1U];
+      i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+      i1_ = i1 + (i0 >> (uint32_t)51U);
+      output[0U] = i0_;
+      output[1U] = i1_;
+    }
+  }
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare__(uint128_t *tmp, uint64_t *output)
+{
+  uint64_t r0 = output[0U];
+  uint64_t r1 = output[1U];
+  uint64_t r2 = output[2U];
+  uint64_t r3 = output[3U];
+  uint64_t r4 = output[4U];
+  uint64_t d0 = r0 * (uint64_t)2U;
+  uint64_t d1 = r1 * (uint64_t)2U;
+  uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U;
+  uint64_t d419 = r4 * (uint64_t)19U;
+  uint64_t d4 = d419 * (uint64_t)2U;
+  uint128_t s0 = (uint128_t)r0 * r0 + (uint128_t)d4 * r1 + (uint128_t)d2 * r3;
+  uint128_t s1 = (uint128_t)d0 * r1 + (uint128_t)d4 * r2 + (uint128_t)(r3 * (uint64_t)19U) * r3;
+  uint128_t s2 = (uint128_t)d0 * r2 + (uint128_t)r1 * r1 + (uint128_t)d4 * r3;
+  uint128_t s3 = (uint128_t)d0 * r3 + (uint128_t)d1 * r2 + (uint128_t)r4 * d419;
+  uint128_t s4 = (uint128_t)d0 * r4 + (uint128_t)d1 * r3 + (uint128_t)r2 * r2;
+  tmp[0U] = s0;
+  tmp[1U] = s1;
+  tmp[2U] = s2;
+  tmp[3U] = s3;
+  tmp[4U] = s4;
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare_(uint128_t *tmp, uint64_t *output)
+{
+  uint128_t b4;
+  uint128_t b0;
+  uint128_t b4_;
+  uint128_t b0_;
+  uint64_t i0;
+  uint64_t i1;
+  uint64_t i0_;
+  uint64_t i1_;
+  Hacl_Bignum_Fsquare_fsquare__(tmp, output);
+  Hacl_Bignum_Fproduct_carry_wide_(tmp);
+  b4 = tmp[4U];
+  b0 = tmp[0U];
+  b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
+  b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
+  tmp[4U] = b4_;
+  tmp[0U] = b0_;
+  Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+  i0 = output[0U];
+  i1 = output[1U];
+  i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+  i1_ = i1 + (i0 >> (uint32_t)51U);
+  output[0U] = i0_;
+  output[1U] = i1_;
+}
+
+static void
+Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, uint128_t *tmp, uint32_t count1)
+{
+  uint32_t i;
+  Hacl_Bignum_Fsquare_fsquare_(tmp, input);
+  for (i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U)
+    Hacl_Bignum_Fsquare_fsquare_(tmp, input);
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1)
+{
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = (uint128_t)(uint64_t)0U;
+    }
+    memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+  }
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1)
+{
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = (uint128_t)(uint64_t)0U;
+    }
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+  }
+}
+
+inline static void Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z)
+{
+  uint64_t buf[20U] = { 0U };
+  uint64_t *a0 = buf;
+  uint64_t *t00 = buf + (uint32_t)5U;
+  uint64_t *b0 = buf + (uint32_t)10U;
+  uint64_t *t01;
+  uint64_t *b1;
+  uint64_t *c0;
+  uint64_t *a;
+  uint64_t *t0;
+  uint64_t *b;
+  uint64_t *c;
+  Hacl_Bignum_Fsquare_fsquare_times(a0, z, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)2U);
+  Hacl_Bignum_Fmul_fmul(b0, t00, z);
+  Hacl_Bignum_Fmul_fmul(a0, b0, a0);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)1U);
+  Hacl_Bignum_Fmul_fmul(b0, t00, b0);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U);
+  t01 = buf + (uint32_t)5U;
+  b1 = buf + (uint32_t)10U;
+  c0 = buf + (uint32_t)15U;
+  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U);
+  Hacl_Bignum_Fmul_fmul(c0, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U);
+  Hacl_Bignum_Fmul_fmul(t01, t01, c0);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U);
+  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U);
+  a = buf;
+  t0 = buf + (uint32_t)5U;
+  b = buf + (uint32_t)10U;
+  c = buf + (uint32_t)15U;
+  Hacl_Bignum_Fmul_fmul(c, t0, b);
+  Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U);
+  Hacl_Bignum_Fmul_fmul(t0, t0, c);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U);
+  Hacl_Bignum_Fmul_fmul(t0, t0, b);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U);
+  Hacl_Bignum_Fmul_fmul(out, t0, a);
+}
+
+inline static void Hacl_Bignum_fsum(uint64_t *a, uint64_t *b)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    uint64_t xi = a[i];
+    uint64_t yi = b[i];
+    a[i] = xi + yi;
+  }
+}
+
+inline static void Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b)
+{
+  uint64_t tmp[5U] = { 0U };
+  uint64_t b0;
+  uint64_t b1;
+  uint64_t b2;
+  uint64_t b3;
+  uint64_t b4;
+  memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]);
+  b0 = tmp[0U];
+  b1 = tmp[1U];
+  b2 = tmp[2U];
+  b3 = tmp[3U];
+  b4 = tmp[4U];
+  tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U;
+  tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U;
+  tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U;
+  tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U;
+  tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U;
+  {
+    uint32_t i;
+    for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+    {
+      uint64_t xi = a[i];
+      uint64_t yi = tmp[i];
+      a[i] = yi - xi;
+    }
+  }
+}
+
+inline static void Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s)
+{
+  KRML_CHECK_SIZE(sizeof (uint128_t), (uint32_t)5U);
+  {
+    uint128_t tmp[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        tmp[_i] = (uint128_t)(uint64_t)0U;
+    }
+    {
+      uint128_t b4;
+      uint128_t b0;
+      uint128_t b4_;
+      uint128_t b0_;
+      {
+        uint32_t i;
+        for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+        {
+          uint64_t xi = b[i];
+          tmp[i] = (uint128_t)xi * s;
+        }
+      }
+      Hacl_Bignum_Fproduct_carry_wide_(tmp);
+      b4 = tmp[4U];
+      b0 = tmp[0U];
+      b4_ = b4 & (uint128_t)(uint64_t)0x7ffffffffffffU;
+      b0_ = b0 + (uint128_t)(uint64_t)19U * (uint64_t)(b4 >> (uint32_t)51U);
+      tmp[4U] = b4_;
+      tmp[0U] = b0_;
+      Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+    }
+  }
+}
+
+inline static void Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b)
+{
+  Hacl_Bignum_Fmul_fmul(output, a, b);
+}
+
+inline static void Hacl_Bignum_crecip(uint64_t *output, uint64_t *input)
+{
+  Hacl_Bignum_Crecip_crecip(output, input);
+}
+
+static void
+Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+  uint32_t i = ctr - (uint32_t)1U;
+  uint64_t ai = a[i];
+  uint64_t bi = b[i];
+  uint64_t x = swap1 & (ai ^ bi);
+  uint64_t ai1 = ai ^ x;
+  uint64_t bi1 = bi ^ x;
+  a[i] = ai1;
+  b[i] = bi1;
+}
+
+static void
+Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+  if (!(ctr == (uint32_t)0U))
+  {
+    uint32_t i;
+    Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr);
+    i = ctr - (uint32_t)1U;
+    Hacl_EC_Point_swap_conditional_(a, b, swap1, i);
+  }
+}
+
+static void Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap)
+{
+  uint64_t swap1 = (uint64_t)0U - iswap;
+  Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U);
+  Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U);
+}
+
+static void Hacl_EC_Point_copy(uint64_t *output, uint64_t *input)
+{
+  memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
+  memcpy(output + (uint32_t)5U,
+    input + (uint32_t)5U,
+    (uint32_t)5U * sizeof (input + (uint32_t)5U)[0U]);
+}
+
+static void Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input)
+{
+  uint64_t i0 = load64_le(input);
+  uint8_t *x00 = input + (uint32_t)6U;
+  uint64_t i1 = load64_le(x00);
+  uint8_t *x01 = input + (uint32_t)12U;
+  uint64_t i2 = load64_le(x01);
+  uint8_t *x02 = input + (uint32_t)19U;
+  uint64_t i3 = load64_le(x02);
+  uint8_t *x0 = input + (uint32_t)24U;
+  uint64_t i4 = load64_le(x0);
+  uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU;
+  output[0U] = output0;
+  output[1U] = output1;
+  output[2U] = output2;
+  output[3U] = output3;
+  output[4U] = output4;
+}
+
+static void Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
+  uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
+  uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
+  uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
+  uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
+  input[0U] = t0_;
+  input[1U] = t1__;
+  input[2U] = t2__;
+  input[3U] = t3__;
+  input[4U] = t4_;
+}
+
+static void Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input)
+{
+  Hacl_EC_Format_fcontract_first_carry_pass(input);
+  Hacl_Bignum_Modulo_carry_top(input);
+}
+
+static void Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
+  uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
+  uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
+  uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
+  uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
+  input[0U] = t0_;
+  input[1U] = t1__;
+  input[2U] = t2__;
+  input[3U] = t3__;
+  input[4U] = t4_;
+}
+
+static void Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input)
+{
+  uint64_t i0;
+  uint64_t i1;
+  uint64_t i0_;
+  uint64_t i1_;
+  Hacl_EC_Format_fcontract_second_carry_pass(input);
+  Hacl_Bignum_Modulo_carry_top(input);
+  i0 = input[0U];
+  i1 = input[1U];
+  i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+  i1_ = i1 + (i0 >> (uint32_t)51U);
+  input[0U] = i0_;
+  input[1U] = i1_;
+}
+
+static void Hacl_EC_Format_fcontract_trim(uint64_t *input)
+{
+  uint64_t a0 = input[0U];
+  uint64_t a1 = input[1U];
+  uint64_t a2 = input[2U];
+  uint64_t a3 = input[3U];
+  uint64_t a4 = input[4U];
+  uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU);
+  uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
+  uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask);
+  uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask);
+  input[0U] = a0_;
+  input[1U] = a1_;
+  input[2U] = a2_;
+  input[3U] = a3_;
+  input[4U] = a4_;
+}
+
+static void Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t o0 = t1 << (uint32_t)51U | t0;
+  uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U;
+  uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U;
+  uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U;
+  uint8_t *b0 = output;
+  uint8_t *b1 = output + (uint32_t)8U;
+  uint8_t *b2 = output + (uint32_t)16U;
+  uint8_t *b3 = output + (uint32_t)24U;
+  store64_le(b0, o0);
+  store64_le(b1, o1);
+  store64_le(b2, o2);
+  store64_le(b3, o3);
+}
+
+static void Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input)
+{
+  Hacl_EC_Format_fcontract_first_carry_full(input);
+  Hacl_EC_Format_fcontract_second_carry_full(input);
+  Hacl_EC_Format_fcontract_trim(input);
+  Hacl_EC_Format_fcontract_store(output, input);
+}
+
+static void Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point)
+{
+  uint64_t *x = point;
+  uint64_t *z = point + (uint32_t)5U;
+  uint64_t buf[10U] = { 0U };
+  uint64_t *zmone = buf;
+  uint64_t *sc = buf + (uint32_t)5U;
+  Hacl_Bignum_crecip(zmone, z);
+  Hacl_Bignum_fmul(sc, x, zmone);
+  Hacl_EC_Format_fcontract(scalar, sc);
+}
+
+static void
+Hacl_EC_AddAndDouble_fmonty(
+  uint64_t *pp,
+  uint64_t *ppq,
+  uint64_t *p,
+  uint64_t *pq,
+  uint64_t *qmqp
+)
+{
+  uint64_t *qx = qmqp;
+  uint64_t *x2 = pp;
+  uint64_t *z2 = pp + (uint32_t)5U;
+  uint64_t *x3 = ppq;
+  uint64_t *z3 = ppq + (uint32_t)5U;
+  uint64_t *x = p;
+  uint64_t *z = p + (uint32_t)5U;
+  uint64_t *xprime = pq;
+  uint64_t *zprime = pq + (uint32_t)5U;
+  uint64_t buf[40U] = { 0U };
+  uint64_t *origx = buf;
+  uint64_t *origxprime0 = buf + (uint32_t)5U;
+  uint64_t *xxprime0 = buf + (uint32_t)25U;
+  uint64_t *zzprime0 = buf + (uint32_t)30U;
+  uint64_t *origxprime;
+  uint64_t *xx0;
+  uint64_t *zz0;
+  uint64_t *xxprime;
+  uint64_t *zzprime;
+  uint64_t *zzzprime;
+  uint64_t *zzz;
+  uint64_t *xx;
+  uint64_t *zz;
+  uint64_t scalar;
+  memcpy(origx, x, (uint32_t)5U * sizeof x[0U]);
+  Hacl_Bignum_fsum(x, z);
+  Hacl_Bignum_fdifference(z, origx);
+  memcpy(origxprime0, xprime, (uint32_t)5U * sizeof xprime[0U]);
+  Hacl_Bignum_fsum(xprime, zprime);
+  Hacl_Bignum_fdifference(zprime, origxprime0);
+  Hacl_Bignum_fmul(xxprime0, xprime, z);
+  Hacl_Bignum_fmul(zzprime0, x, zprime);
+  origxprime = buf + (uint32_t)5U;
+  xx0 = buf + (uint32_t)15U;
+  zz0 = buf + (uint32_t)20U;
+  xxprime = buf + (uint32_t)25U;
+  zzprime = buf + (uint32_t)30U;
+  zzzprime = buf + (uint32_t)35U;
+  memcpy(origxprime, xxprime, (uint32_t)5U * sizeof xxprime[0U]);
+  Hacl_Bignum_fsum(xxprime, zzprime);
+  Hacl_Bignum_fdifference(zzprime, origxprime);
+  Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U);
+  Hacl_Bignum_fmul(z3, zzzprime, qx);
+  Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U);
+  zzz = buf + (uint32_t)10U;
+  xx = buf + (uint32_t)15U;
+  zz = buf + (uint32_t)20U;
+  Hacl_Bignum_fmul(x2, xx, zz);
+  Hacl_Bignum_fdifference(zz, xx);
+  scalar = (uint64_t)121665U;
+  Hacl_Bignum_fscalar(zzz, zz, scalar);
+  Hacl_Bignum_fsum(zzz, xx);
+  Hacl_Bignum_fmul(z2, zzz, zz);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt
+)
+{
+  uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U);
+  uint64_t bit;
+  Hacl_EC_Point_swap_conditional(nq, nqpq, bit0);
+  Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q);
+  bit = (uint64_t)(byt >> (uint32_t)7U);
+  Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt
+)
+{
+  uint8_t byt1;
+  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
+  byt1 = byt << (uint32_t)1U;
+  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt,
+  uint32_t i
+)
+{
+  if (!(i == (uint32_t)0U))
+  {
+    uint32_t i_ = i - (uint32_t)1U;
+    uint8_t byt_;
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt);
+    byt_ = byt << (uint32_t)2U;
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_);
+  }
+}
+
+static void
+Hacl_EC_Ladder_BigLoop_cmult_big_loop(
+  uint8_t *n1,
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint32_t i
+)
+{
+  if (!(i == (uint32_t)0U))
+  {
+    uint32_t i1 = i - (uint32_t)1U;
+    uint8_t byte = n1[i1];
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U);
+    Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1);
+  }
+}
+
+static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q)
+{
+  uint64_t point_buf[40U] = { 0U };
+  uint64_t *nq = point_buf;
+  uint64_t *nqpq = point_buf + (uint32_t)10U;
+  uint64_t *nq2 = point_buf + (uint32_t)20U;
+  uint64_t *nqpq2 = point_buf + (uint32_t)30U;
+  Hacl_EC_Point_copy(nqpq, q);
+  nq[0U] = (uint64_t)1U;
+  Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U);
+  Hacl_EC_Point_copy(result, nq);
+}
+
+void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
+{
+  uint64_t buf0[10U] = { 0U };
+  uint64_t *x0 = buf0;
+  uint64_t *z = buf0 + (uint32_t)5U;
+  uint64_t *q;
+  Hacl_EC_Format_fexpand(x0, basepoint);
+  z[0U] = (uint64_t)1U;
+  q = buf0;
+  {
+    uint8_t e[32U] = { 0U };
+    uint8_t e0;
+    uint8_t e31;
+    uint8_t e01;
+    uint8_t e311;
+    uint8_t e312;
+    uint8_t *scalar;
+    memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]);
+    e0 = e[0U];
+    e31 = e[31U];
+    e01 = e0 & (uint8_t)248U;
+    e311 = e31 & (uint8_t)127U;
+    e312 = e311 | (uint8_t)64U;
+    e[0U] = e01;
+    e[31U] = e312;
+    scalar = e;
+    {
+      uint64_t buf[15U] = { 0U };
+      uint64_t *nq = buf;
+      uint64_t *x = nq;
+      x[0U] = (uint64_t)1U;
+      Hacl_EC_Ladder_cmult(nq, scalar, q);
+      Hacl_EC_Format_scalar_of_point(mypublic, nq);
+    }
+  }
+}
+

diff --git a/3rdparty/everest/library/Hacl_Curve25519_joined.c b/3rdparty/everest/library/Hacl_Curve25519_joined.c
new file mode 100644
index 0000000..a778160
--- /dev/null
+++ b/3rdparty/everest/library/Hacl_Curve25519_joined.c

@@ -0,0 +1,50 @@
+/*
+ *  Interface to code from Project Everest
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org)
+ */
+#ifndef _BSD_SOURCE
+/* Required to get htole64() from gcc/glibc's endian.h (older systems)
+ * when we compile with -std=c99 */
+#define _BSD_SOURCE
+#endif
+#ifndef _DEFAULT_SOURCE
+/* (modern version of _BSD_SOURCE) */
+#define _DEFAULT_SOURCE
+#endif
+
+#include "common.h"
+
+#if defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
+
+#if defined(__SIZEOF_INT128__) && (__SIZEOF_INT128__ == 16)
+#define MBEDTLS_HAVE_INT128
+#endif
+
+#if defined(MBEDTLS_HAVE_INT128)
+#include "Hacl_Curve25519.c"
+#else
+#define KRML_VERIFIED_UINT128
+#include "kremlib/FStar_UInt128_extracted.c"
+#include "legacy/Hacl_Curve25519.c"
+#endif
+
+#include "kremlib/FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.c"
+
+#endif /* defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED) */
+

diff --git a/3rdparty/everest/library/everest.c b/3rdparty/everest/library/everest.c
new file mode 100644
index 0000000..fefc6a2
--- /dev/null
+++ b/3rdparty/everest/library/everest.c

@@ -0,0 +1,102 @@
+/*
+ *  Interface to code from Project Everest
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org).
+ */
+
+#include "common.h"
+
+#include <string.h>
+
+#include "mbedtls/ecdh.h"
+
+#include "everest/x25519.h"
+#include "everest/everest.h"
+
+#include "mbedtls/platform.h"
+
+#if defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
+
+int mbedtls_everest_setup( mbedtls_ecdh_context_everest *ctx, int grp_id )
+{
+    if( grp_id != MBEDTLS_ECP_DP_CURVE25519 )
+        return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
+    mbedtls_x25519_init( &ctx->ctx );
+    return 0;
+}
+
+void mbedtls_everest_free( mbedtls_ecdh_context_everest *ctx )
+{
+    mbedtls_x25519_free( &ctx->ctx );
+}
+
+int mbedtls_everest_make_params( mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                 unsigned char *buf, size_t blen,
+                                 int( *f_rng )( void *, unsigned char *, size_t ),
+                                 void *p_rng )
+{
+    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
+    return mbedtls_x25519_make_params( x25519_ctx, olen, buf, blen, f_rng, p_rng );
+}
+
+int mbedtls_everest_read_params( mbedtls_ecdh_context_everest *ctx,
+                                 const unsigned char **buf,
+                                 const unsigned char *end )
+{
+    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
+    return mbedtls_x25519_read_params( x25519_ctx, buf, end );
+}
+
+int mbedtls_everest_get_params( mbedtls_ecdh_context_everest *ctx,
+                                const mbedtls_ecp_keypair *key,
+                                mbedtls_everest_ecdh_side side )
+{
+    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
+    mbedtls_x25519_ecdh_side s = side == MBEDTLS_EVEREST_ECDH_OURS ?
+                                            MBEDTLS_X25519_ECDH_OURS :
+                                            MBEDTLS_X25519_ECDH_THEIRS;
+    return mbedtls_x25519_get_params( x25519_ctx, key, s );
+}
+
+int mbedtls_everest_make_public( mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                 unsigned char *buf, size_t blen,
+                                 int( *f_rng )( void *, unsigned char *, size_t ),
+                                 void *p_rng )
+{
+    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
+    return mbedtls_x25519_make_public( x25519_ctx, olen, buf, blen, f_rng, p_rng );
+}
+
+int mbedtls_everest_read_public( mbedtls_ecdh_context_everest *ctx,
+                                 const unsigned char *buf, size_t blen )
+{
+    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
+    return mbedtls_x25519_read_public ( x25519_ctx, buf, blen );
+}
+
+int mbedtls_everest_calc_secret( mbedtls_ecdh_context_everest *ctx, size_t *olen,
+                                 unsigned char *buf, size_t blen,
+                                 int( *f_rng )( void *, unsigned char *, size_t ),
+                                 void *p_rng )
+{
+    mbedtls_x25519_context *x25519_ctx = &ctx->ctx;
+    return mbedtls_x25519_calc_secret( x25519_ctx, olen, buf, blen, f_rng, p_rng );
+}
+
+#endif /* MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED */
+

diff --git a/3rdparty/everest/library/kremlib/FStar_UInt128_extracted.c b/3rdparty/everest/library/kremlib/FStar_UInt128_extracted.c
new file mode 100644
index 0000000..1060515
--- /dev/null
+++ b/3rdparty/everest/library/kremlib/FStar_UInt128_extracted.c

@@ -0,0 +1,413 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: ../krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrB9w -minimal -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -tmpdir extracted -warn-error +9+11 -skip-compilation -extract-uints -add-include <inttypes.h> -add-include "kremlib.h" -add-include "kremlin/internal/compat.h" extracted/prims.krml extracted/FStar_Pervasives_Native.krml extracted/FStar_Pervasives.krml extracted/FStar_Mul.krml extracted/FStar_Squash.krml extracted/FStar_Classical.krml extracted/FStar_StrongExcludedMiddle.krml extracted/FStar_FunctionalExtensionality.krml extracted/FStar_List_Tot_Base.krml extracted/FStar_List_Tot_Properties.krml extracted/FStar_List_Tot.krml extracted/FStar_Seq_Base.krml extracted/FStar_Seq_Properties.krml extracted/FStar_Seq.krml extracted/FStar_Math_Lib.krml extracted/FStar_Math_Lemmas.krml extracted/FStar_BitVector.krml extracted/FStar_UInt.krml extracted/FStar_UInt32.krml extracted/FStar_Int.krml extracted/FStar_Int16.krml extracted/FStar_Preorder.krml extracted/FStar_Ghost.krml extracted/FStar_ErasedLogic.krml extracted/FStar_UInt64.krml extracted/FStar_Set.krml extracted/FStar_PropositionalExtensionality.krml extracted/FStar_PredicateExtensionality.krml extracted/FStar_TSet.krml extracted/FStar_Monotonic_Heap.krml extracted/FStar_Heap.krml extracted/FStar_Map.krml extracted/FStar_Monotonic_HyperHeap.krml extracted/FStar_Monotonic_HyperStack.krml extracted/FStar_HyperStack.krml extracted/FStar_Monotonic_Witnessed.krml extracted/FStar_HyperStack_ST.krml extracted/FStar_HyperStack_All.krml extracted/FStar_Date.krml extracted/FStar_Universe.krml extracted/FStar_GSet.krml extracted/FStar_ModifiesGen.krml extracted/LowStar_Monotonic_Buffer.krml extracted/LowStar_Buffer.krml extracted/Spec_Loops.krml extracted/LowStar_BufferOps.krml extracted/C_Loops.krml extracted/FStar_UInt8.krml extracted/FStar_Kremlin_Endianness.krml extracted/FStar_UInt63.krml extracted/FStar_Exn.krml extracted/FStar_ST.krml extracted/FStar_All.krml extracted/FStar_Dyn.krml extracted/FStar_Int63.krml extracted/FStar_Int64.krml extracted/FStar_Int32.krml extracted/FStar_Int8.krml extracted/FStar_UInt16.krml extracted/FStar_Int_Cast.krml extracted/FStar_UInt128.krml extracted/C_Endianness.krml extracted/FStar_List.krml extracted/FStar_Float.krml extracted/FStar_IO.krml extracted/C.krml extracted/FStar_Char.krml extracted/FStar_String.krml extracted/LowStar_Modifies.krml extracted/C_String.krml extracted/FStar_Bytes.krml extracted/FStar_HyperStack_IO.krml extracted/C_Failure.krml extracted/TestLib.krml extracted/FStar_Int_Cast_Full.krml
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+#include "FStar_UInt128.h"
+#include "kremlin/c_endianness.h"
+#include "FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.h"
+
+uint64_t FStar_UInt128___proj__Mkuint128__item__low(FStar_UInt128_uint128 projectee)
+{
+  return projectee.low;
+}
+
+uint64_t FStar_UInt128___proj__Mkuint128__item__high(FStar_UInt128_uint128 projectee)
+{
+  return projectee.high;
+}
+
+static uint64_t FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b)
+{
+  return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U;
+}
+
+static uint64_t FStar_UInt128_carry(uint64_t a, uint64_t b)
+{
+  return FStar_UInt128_constant_time_carry(a, b);
+}
+
+FStar_UInt128_uint128 FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat = { a.low + b.low, a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) };
+  return flat;
+}
+
+FStar_UInt128_uint128
+FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat = { a.low + b.low, a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat = { a.low + b.low, a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat = { a.low - b.low, a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) };
+  return flat;
+}
+
+FStar_UInt128_uint128
+FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat = { a.low - b.low, a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) };
+  return flat;
+}
+
+static FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat = { a.low - b.low, a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return FStar_UInt128_sub_mod_impl(a, b);
+}
+
+FStar_UInt128_uint128 FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 flat = { a.low & b.low, a.high & b.high };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 flat = { a.low ^ b.low, a.high ^ b.high };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128 flat = { a.low | b.low, a.high | b.high };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a)
+{
+  FStar_UInt128_uint128 flat = { ~a.low, ~a.high };
+  return flat;
+}
+
+static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U;
+
+static uint64_t FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));
+}
+
+static uint64_t FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+static FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s == (uint32_t)0U)
+  {
+    return a;
+  }
+  else
+  {
+    FStar_UInt128_uint128
+    flat = { a.low << s, FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s) };
+    return flat;
+  }
+}
+
+static FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+  FStar_UInt128_uint128 flat = { (uint64_t)0U, a.low << (s - FStar_UInt128_u32_64) };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s < FStar_UInt128_u32_64)
+  {
+    return FStar_UInt128_shift_left_small(a, s);
+  }
+  else
+  {
+    return FStar_UInt128_shift_left_large(a, s);
+  }
+}
+
+static uint64_t FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));
+}
+
+static uint64_t FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+  return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+static FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s == (uint32_t)0U)
+  {
+    return a;
+  }
+  else
+  {
+    FStar_UInt128_uint128
+    flat = { FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s), a.high >> s };
+    return flat;
+  }
+}
+
+static FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+  FStar_UInt128_uint128 flat = { a.high >> (s - FStar_UInt128_u32_64), (uint64_t)0U };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s)
+{
+  if (s < FStar_UInt128_u32_64)
+  {
+    return FStar_UInt128_shift_right_small(a, s);
+  }
+  else
+  {
+    return FStar_UInt128_shift_right_large(a, s);
+  }
+}
+
+bool FStar_UInt128_eq(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.low == b.low && a.high == b.high;
+}
+
+bool FStar_UInt128_gt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high > b.high || (a.high == b.high && a.low > b.low);
+}
+
+bool FStar_UInt128_lt(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high < b.high || (a.high == b.high && a.low < b.low);
+}
+
+bool FStar_UInt128_gte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high > b.high || (a.high == b.high && a.low >= b.low);
+}
+
+bool FStar_UInt128_lte(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  return a.high < b.high || (a.high == b.high && a.low <= b.low);
+}
+
+FStar_UInt128_uint128 FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat =
+    {
+      FStar_UInt64_eq_mask(a.low,
+        b.low)
+      & FStar_UInt64_eq_mask(a.high, b.high),
+      FStar_UInt64_eq_mask(a.low,
+        b.low)
+      & FStar_UInt64_eq_mask(a.high, b.high)
+    };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+  FStar_UInt128_uint128
+  flat =
+    {
+      (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high))
+      | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)),
+      (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high))
+      | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low))
+    };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a)
+{
+  FStar_UInt128_uint128 flat = { a, (uint64_t)0U };
+  return flat;
+}
+
+uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+  return a.low;
+}
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Plus_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_add;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Plus_Question_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_add_underspec;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Plus_Percent_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_add_mod;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Subtraction_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_sub;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Subtraction_Question_Hat)(
+  FStar_UInt128_uint128 x0,
+  FStar_UInt128_uint128 x1
+) = FStar_UInt128_sub_underspec;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Subtraction_Percent_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_sub_mod;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Amp_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_logand;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Hat_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_logxor;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Bar_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_logor;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Less_Less_Hat)(FStar_UInt128_uint128 x0, uint32_t x1) =
+  FStar_UInt128_shift_left;
+
+FStar_UInt128_uint128
+(*FStar_UInt128_op_Greater_Greater_Hat)(FStar_UInt128_uint128 x0, uint32_t x1) =
+  FStar_UInt128_shift_right;
+
+bool
+(*FStar_UInt128_op_Equals_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_eq;
+
+bool
+(*FStar_UInt128_op_Greater_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_gt;
+
+bool
+(*FStar_UInt128_op_Less_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_lt;
+
+bool
+(*FStar_UInt128_op_Greater_Equals_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_gte;
+
+bool
+(*FStar_UInt128_op_Less_Equals_Hat)(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1) =
+  FStar_UInt128_lte;
+
+static uint64_t FStar_UInt128_u64_mod_32(uint64_t a)
+{
+  return a & (uint64_t)0xffffffffU;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U;
+
+static uint64_t FStar_UInt128_u32_combine(uint64_t hi, uint64_t lo)
+{
+  return lo + (hi << FStar_UInt128_u32_32);
+}
+
+FStar_UInt128_uint128 FStar_UInt128_mul32(uint64_t x, uint32_t y)
+{
+  FStar_UInt128_uint128
+  flat =
+    {
+      FStar_UInt128_u32_combine((x >> FStar_UInt128_u32_32)
+        * (uint64_t)y
+        + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32),
+        FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)),
+      ((x >> FStar_UInt128_u32_32)
+      * (uint64_t)y
+      + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> FStar_UInt128_u32_32))
+      >> FStar_UInt128_u32_32
+    };
+  return flat;
+}
+
+typedef struct K___uint64_t_uint64_t_uint64_t_uint64_t_s
+{
+  uint64_t fst;
+  uint64_t snd;
+  uint64_t thd;
+  uint64_t f3;
+}
+K___uint64_t_uint64_t_uint64_t_uint64_t;
+
+static K___uint64_t_uint64_t_uint64_t_uint64_t
+FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y)
+{
+  K___uint64_t_uint64_t_uint64_t_uint64_t
+  flat =
+    {
+      FStar_UInt128_u64_mod_32(x),
+      FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)),
+      x
+      >> FStar_UInt128_u32_32,
+      (x >> FStar_UInt128_u32_32)
+      * FStar_UInt128_u64_mod_32(y)
+      + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32)
+    };
+  return flat;
+}
+
+static uint64_t FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)
+{
+  return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static FStar_UInt128_uint128 FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y)
+{
+  K___uint64_t_uint64_t_uint64_t_uint64_t scrut = FStar_UInt128_mul_wide_impl_t_(x, y);
+  uint64_t u1 = scrut.fst;
+  uint64_t w3 = scrut.snd;
+  uint64_t x_ = scrut.thd;
+  uint64_t t_ = scrut.f3;
+  FStar_UInt128_uint128
+  flat =
+    {
+      FStar_UInt128_u32_combine_(u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_),
+        w3),
+      x_
+      * (y >> FStar_UInt128_u32_32)
+      + (t_ >> FStar_UInt128_u32_32)
+      + ((u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_)) >> FStar_UInt128_u32_32)
+    };
+  return flat;
+}
+
+FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+  return FStar_UInt128_mul_wide_impl(x, y);
+}
+

diff --git a/3rdparty/everest/library/kremlib/FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.c b/3rdparty/everest/library/kremlib/FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.c
new file mode 100644
index 0000000..0826524
--- /dev/null
+++ b/3rdparty/everest/library/kremlib/FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.c

@@ -0,0 +1,100 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: ../krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrB9w -minimal -fparentheses -fcurly-braces -fno-shadow -header copyright-header.txt -minimal -tmpdir dist/minimal -skip-compilation -extract-uints -add-include <inttypes.h> -add-include <stdbool.h> -add-include "kremlin/internal/compat.h" -add-include "kremlin/internal/types.h" -bundle FStar.UInt64+FStar.UInt32+FStar.UInt16+FStar.UInt8=* extracted/prims.krml extracted/FStar_Pervasives_Native.krml extracted/FStar_Pervasives.krml extracted/FStar_Mul.krml extracted/FStar_Squash.krml extracted/FStar_Classical.krml extracted/FStar_StrongExcludedMiddle.krml extracted/FStar_FunctionalExtensionality.krml extracted/FStar_List_Tot_Base.krml extracted/FStar_List_Tot_Properties.krml extracted/FStar_List_Tot.krml extracted/FStar_Seq_Base.krml extracted/FStar_Seq_Properties.krml extracted/FStar_Seq.krml extracted/FStar_Math_Lib.krml extracted/FStar_Math_Lemmas.krml extracted/FStar_BitVector.krml extracted/FStar_UInt.krml extracted/FStar_UInt32.krml extracted/FStar_Int.krml extracted/FStar_Int16.krml extracted/FStar_Preorder.krml extracted/FStar_Ghost.krml extracted/FStar_ErasedLogic.krml extracted/FStar_UInt64.krml extracted/FStar_Set.krml extracted/FStar_PropositionalExtensionality.krml extracted/FStar_PredicateExtensionality.krml extracted/FStar_TSet.krml extracted/FStar_Monotonic_Heap.krml extracted/FStar_Heap.krml extracted/FStar_Map.krml extracted/FStar_Monotonic_HyperHeap.krml extracted/FStar_Monotonic_HyperStack.krml extracted/FStar_HyperStack.krml extracted/FStar_Monotonic_Witnessed.krml extracted/FStar_HyperStack_ST.krml extracted/FStar_HyperStack_All.krml extracted/FStar_Date.krml extracted/FStar_Universe.krml extracted/FStar_GSet.krml extracted/FStar_ModifiesGen.krml extracted/LowStar_Monotonic_Buffer.krml extracted/LowStar_Buffer.krml extracted/Spec_Loops.krml extracted/LowStar_BufferOps.krml extracted/C_Loops.krml extracted/FStar_UInt8.krml extracted/FStar_Kremlin_Endianness.krml extracted/FStar_UInt63.krml extracted/FStar_Exn.krml extracted/FStar_ST.krml extracted/FStar_All.krml extracted/FStar_Dyn.krml extracted/FStar_Int63.krml extracted/FStar_Int64.krml extracted/FStar_Int32.krml extracted/FStar_Int8.krml extracted/FStar_UInt16.krml extracted/FStar_Int_Cast.krml extracted/FStar_UInt128.krml extracted/C_Endianness.krml extracted/FStar_List.krml extracted/FStar_Float.krml extracted/FStar_IO.krml extracted/C.krml extracted/FStar_Char.krml extracted/FStar_String.krml extracted/LowStar_Modifies.krml extracted/C_String.krml extracted/FStar_Bytes.krml extracted/FStar_HyperStack_IO.krml extracted/C_Failure.krml extracted/TestLib.krml extracted/FStar_Int_Cast_Full.krml
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+#include "FStar_UInt64_FStar_UInt32_FStar_UInt16_FStar_UInt8.h"
+
+uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b)
+{
+  uint64_t x = a ^ b;
+  uint64_t minus_x = ~x + (uint64_t)1U;
+  uint64_t x_or_minus_x = x | minus_x;
+  uint64_t xnx = x_or_minus_x >> (uint32_t)63U;
+  return xnx - (uint64_t)1U;
+}
+
+uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b)
+{
+  uint64_t x = a;
+  uint64_t y = b;
+  uint64_t x_xor_y = x ^ y;
+  uint64_t x_sub_y = x - y;
+  uint64_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint64_t q = x_xor_y | x_sub_y_xor_y;
+  uint64_t x_xor_q = x ^ q;
+  uint64_t x_xor_q_ = x_xor_q >> (uint32_t)63U;
+  return x_xor_q_ - (uint64_t)1U;
+}
+
+uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b)
+{
+  uint32_t x = a ^ b;
+  uint32_t minus_x = ~x + (uint32_t)1U;
+  uint32_t x_or_minus_x = x | minus_x;
+  uint32_t xnx = x_or_minus_x >> (uint32_t)31U;
+  return xnx - (uint32_t)1U;
+}
+
+uint32_t FStar_UInt32_gte_mask(uint32_t a, uint32_t b)
+{
+  uint32_t x = a;
+  uint32_t y = b;
+  uint32_t x_xor_y = x ^ y;
+  uint32_t x_sub_y = x - y;
+  uint32_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint32_t q = x_xor_y | x_sub_y_xor_y;
+  uint32_t x_xor_q = x ^ q;
+  uint32_t x_xor_q_ = x_xor_q >> (uint32_t)31U;
+  return x_xor_q_ - (uint32_t)1U;
+}
+
+uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b)
+{
+  uint16_t x = a ^ b;
+  uint16_t minus_x = ~x + (uint16_t)1U;
+  uint16_t x_or_minus_x = x | minus_x;
+  uint16_t xnx = x_or_minus_x >> (uint32_t)15U;
+  return xnx - (uint16_t)1U;
+}
+
+uint16_t FStar_UInt16_gte_mask(uint16_t a, uint16_t b)
+{
+  uint16_t x = a;
+  uint16_t y = b;
+  uint16_t x_xor_y = x ^ y;
+  uint16_t x_sub_y = x - y;
+  uint16_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint16_t q = x_xor_y | x_sub_y_xor_y;
+  uint16_t x_xor_q = x ^ q;
+  uint16_t x_xor_q_ = x_xor_q >> (uint32_t)15U;
+  return x_xor_q_ - (uint16_t)1U;
+}
+
+uint8_t FStar_UInt8_eq_mask(uint8_t a, uint8_t b)
+{
+  uint8_t x = a ^ b;
+  uint8_t minus_x = ~x + (uint8_t)1U;
+  uint8_t x_or_minus_x = x | minus_x;
+  uint8_t xnx = x_or_minus_x >> (uint32_t)7U;
+  return xnx - (uint8_t)1U;
+}
+
+uint8_t FStar_UInt8_gte_mask(uint8_t a, uint8_t b)
+{
+  uint8_t x = a;
+  uint8_t y = b;
+  uint8_t x_xor_y = x ^ y;
+  uint8_t x_sub_y = x - y;
+  uint8_t x_sub_y_xor_y = x_sub_y ^ y;
+  uint8_t q = x_xor_y | x_sub_y_xor_y;
+  uint8_t x_xor_q = x ^ q;
+  uint8_t x_xor_q_ = x_xor_q >> (uint32_t)7U;
+  return x_xor_q_ - (uint8_t)1U;
+}
+

diff --git a/3rdparty/everest/library/legacy/Hacl_Curve25519.c b/3rdparty/everest/library/legacy/Hacl_Curve25519.c
new file mode 100644
index 0000000..babebe4
--- /dev/null
+++ b/3rdparty/everest/library/legacy/Hacl_Curve25519.c

@@ -0,0 +1,805 @@
+/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved.
+   Licensed under the Apache 2.0 License. */
+
+/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin>
+ * KreMLin invocation: /mnt/e/everest/verify/kremlin/krml -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -fc89 -fparentheses -fno-shadow -header /mnt/e/everest/verify/hdrcLh -minimal -I /mnt/e/everest/verify/hacl-star/code/lib/kremlin -I /mnt/e/everest/verify/kremlin/kremlib/compat -I /mnt/e/everest/verify/hacl-star/specs -I /mnt/e/everest/verify/hacl-star/specs/old -I . -ccopt -march=native -verbose -ldopt -flto -tmpdir x25519-c -I ../bignum -bundle Hacl.Curve25519=* -minimal -add-include "kremlib.h" -skip-compilation x25519-c/out.krml -o x25519-c/Hacl_Curve25519.c
+ * F* version: 059db0c8
+ * KreMLin version: 916c37ac
+ */
+
+
+#include "Hacl_Curve25519.h"
+
+extern uint64_t FStar_UInt64_eq_mask(uint64_t x0, uint64_t x1);
+
+extern uint64_t FStar_UInt64_gte_mask(uint64_t x0, uint64_t x1);
+
+extern FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 x0, FStar_UInt128_uint128 x1);
+
+extern FStar_UInt128_uint128 FStar_UInt128_shift_right(FStar_UInt128_uint128 x0, uint32_t x1);
+
+extern FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t x0);
+
+extern uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 x0);
+
+extern FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x0, uint64_t x1);
+
+static void Hacl_Bignum_Modulo_carry_top(uint64_t *b)
+{
+  uint64_t b4 = b[4U];
+  uint64_t b0 = b[0U];
+  uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU;
+  uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U);
+  b[4U] = b4_;
+  b[0U] = b0_;
+}
+
+inline static void
+Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, FStar_UInt128_uint128 *input)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    FStar_UInt128_uint128 xi = input[i];
+    output[i] = FStar_UInt128_uint128_to_uint64(xi);
+  }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_sum_scalar_multiplication_(
+  FStar_UInt128_uint128 *output,
+  uint64_t *input,
+  uint64_t s
+)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    FStar_UInt128_uint128 xi = output[i];
+    uint64_t yi = input[i];
+    output[i] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s));
+  }
+}
+
+inline static void Hacl_Bignum_Fproduct_carry_wide_(FStar_UInt128_uint128 *tmp)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
+  {
+    uint32_t ctr = i;
+    FStar_UInt128_uint128 tctr = tmp[ctr];
+    FStar_UInt128_uint128 tctrp1 = tmp[ctr + (uint32_t)1U];
+    uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU;
+    FStar_UInt128_uint128 c = FStar_UInt128_shift_right(tctr, (uint32_t)51U);
+    tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0);
+    tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c);
+  }
+}
+
+inline static void Hacl_Bignum_Fmul_shift_reduce(uint64_t *output)
+{
+  uint64_t tmp = output[4U];
+  uint64_t b0;
+  {
+    uint32_t i;
+    for (i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U)
+    {
+      uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U;
+      uint64_t z = output[ctr - (uint32_t)1U];
+      output[ctr] = z;
+    }
+  }
+  output[0U] = tmp;
+  b0 = output[0U];
+  output[0U] = (uint64_t)19U * b0;
+}
+
+static void
+Hacl_Bignum_Fmul_mul_shift_reduce_(
+  FStar_UInt128_uint128 *output,
+  uint64_t *input,
+  uint64_t *input2
+)
+{
+  uint32_t i;
+  uint64_t input2i;
+  {
+    uint32_t i0;
+    for (i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0 = i0 + (uint32_t)1U)
+    {
+      uint64_t input2i0 = input2[i0];
+      Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i0);
+      Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+  }
+  i = (uint32_t)4U;
+  input2i = input2[i];
+  Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+}
+
+inline static void Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2)
+{
+  uint64_t tmp[5U] = { 0U };
+  memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]);
+  KRML_CHECK_SIZE(sizeof (FStar_UInt128_uint128), (uint32_t)5U);
+  {
+    FStar_UInt128_uint128 t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+    }
+    {
+      FStar_UInt128_uint128 b4;
+      FStar_UInt128_uint128 b0;
+      FStar_UInt128_uint128 b4_;
+      FStar_UInt128_uint128 b0_;
+      uint64_t i0;
+      uint64_t i1;
+      uint64_t i0_;
+      uint64_t i1_;
+      Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
+      Hacl_Bignum_Fproduct_carry_wide_(t);
+      b4 = t[4U];
+      b0 = t[0U];
+      b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU));
+      b0_ =
+        FStar_UInt128_add(b0,
+          FStar_UInt128_mul_wide((uint64_t)19U,
+            FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U))));
+      t[4U] = b4_;
+      t[0U] = b0_;
+      Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
+      i0 = output[0U];
+      i1 = output[1U];
+      i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+      i1_ = i1 + (i0 >> (uint32_t)51U);
+      output[0U] = i0_;
+      output[1U] = i1_;
+    }
+  }
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare__(FStar_UInt128_uint128 *tmp, uint64_t *output)
+{
+  uint64_t r0 = output[0U];
+  uint64_t r1 = output[1U];
+  uint64_t r2 = output[2U];
+  uint64_t r3 = output[3U];
+  uint64_t r4 = output[4U];
+  uint64_t d0 = r0 * (uint64_t)2U;
+  uint64_t d1 = r1 * (uint64_t)2U;
+  uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U;
+  uint64_t d419 = r4 * (uint64_t)19U;
+  uint64_t d4 = d419 * (uint64_t)2U;
+  FStar_UInt128_uint128
+  s0 =
+    FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(r0, r0),
+        FStar_UInt128_mul_wide(d4, r1)),
+      FStar_UInt128_mul_wide(d2, r3));
+  FStar_UInt128_uint128
+  s1 =
+    FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r1),
+        FStar_UInt128_mul_wide(d4, r2)),
+      FStar_UInt128_mul_wide(r3 * (uint64_t)19U, r3));
+  FStar_UInt128_uint128
+  s2 =
+    FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r2),
+        FStar_UInt128_mul_wide(r1, r1)),
+      FStar_UInt128_mul_wide(d4, r3));
+  FStar_UInt128_uint128
+  s3 =
+    FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r3),
+        FStar_UInt128_mul_wide(d1, r2)),
+      FStar_UInt128_mul_wide(r4, d419));
+  FStar_UInt128_uint128
+  s4 =
+    FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r4),
+        FStar_UInt128_mul_wide(d1, r3)),
+      FStar_UInt128_mul_wide(r2, r2));
+  tmp[0U] = s0;
+  tmp[1U] = s1;
+  tmp[2U] = s2;
+  tmp[3U] = s3;
+  tmp[4U] = s4;
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare_(FStar_UInt128_uint128 *tmp, uint64_t *output)
+{
+  FStar_UInt128_uint128 b4;
+  FStar_UInt128_uint128 b0;
+  FStar_UInt128_uint128 b4_;
+  FStar_UInt128_uint128 b0_;
+  uint64_t i0;
+  uint64_t i1;
+  uint64_t i0_;
+  uint64_t i1_;
+  Hacl_Bignum_Fsquare_fsquare__(tmp, output);
+  Hacl_Bignum_Fproduct_carry_wide_(tmp);
+  b4 = tmp[4U];
+  b0 = tmp[0U];
+  b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU));
+  b0_ =
+    FStar_UInt128_add(b0,
+      FStar_UInt128_mul_wide((uint64_t)19U,
+        FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U))));
+  tmp[4U] = b4_;
+  tmp[0U] = b0_;
+  Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+  i0 = output[0U];
+  i1 = output[1U];
+  i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+  i1_ = i1 + (i0 >> (uint32_t)51U);
+  output[0U] = i0_;
+  output[1U] = i1_;
+}
+
+static void
+Hacl_Bignum_Fsquare_fsquare_times_(
+  uint64_t *input,
+  FStar_UInt128_uint128 *tmp,
+  uint32_t count1
+)
+{
+  uint32_t i;
+  Hacl_Bignum_Fsquare_fsquare_(tmp, input);
+  for (i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U)
+    Hacl_Bignum_Fsquare_fsquare_(tmp, input);
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1)
+{
+  KRML_CHECK_SIZE(sizeof (FStar_UInt128_uint128), (uint32_t)5U);
+  {
+    FStar_UInt128_uint128 t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+    }
+    memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+  }
+}
+
+inline static void Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1)
+{
+  KRML_CHECK_SIZE(sizeof (FStar_UInt128_uint128), (uint32_t)5U);
+  {
+    FStar_UInt128_uint128 t[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+    }
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+  }
+}
+
+inline static void Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z)
+{
+  uint64_t buf[20U] = { 0U };
+  uint64_t *a0 = buf;
+  uint64_t *t00 = buf + (uint32_t)5U;
+  uint64_t *b0 = buf + (uint32_t)10U;
+  uint64_t *t01;
+  uint64_t *b1;
+  uint64_t *c0;
+  uint64_t *a;
+  uint64_t *t0;
+  uint64_t *b;
+  uint64_t *c;
+  Hacl_Bignum_Fsquare_fsquare_times(a0, z, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)2U);
+  Hacl_Bignum_Fmul_fmul(b0, t00, z);
+  Hacl_Bignum_Fmul_fmul(a0, b0, a0);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, a0, (uint32_t)1U);
+  Hacl_Bignum_Fmul_fmul(b0, t00, b0);
+  Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U);
+  t01 = buf + (uint32_t)5U;
+  b1 = buf + (uint32_t)10U;
+  c0 = buf + (uint32_t)15U;
+  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U);
+  Hacl_Bignum_Fmul_fmul(c0, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U);
+  Hacl_Bignum_Fmul_fmul(t01, t01, c0);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U);
+  Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+  Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U);
+  a = buf;
+  t0 = buf + (uint32_t)5U;
+  b = buf + (uint32_t)10U;
+  c = buf + (uint32_t)15U;
+  Hacl_Bignum_Fmul_fmul(c, t0, b);
+  Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U);
+  Hacl_Bignum_Fmul_fmul(t0, t0, c);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U);
+  Hacl_Bignum_Fmul_fmul(t0, t0, b);
+  Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U);
+  Hacl_Bignum_Fmul_fmul(out, t0, a);
+}
+
+inline static void Hacl_Bignum_fsum(uint64_t *a, uint64_t *b)
+{
+  uint32_t i;
+  for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+  {
+    uint64_t xi = a[i];
+    uint64_t yi = b[i];
+    a[i] = xi + yi;
+  }
+}
+
+inline static void Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b)
+{
+  uint64_t tmp[5U] = { 0U };
+  uint64_t b0;
+  uint64_t b1;
+  uint64_t b2;
+  uint64_t b3;
+  uint64_t b4;
+  memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]);
+  b0 = tmp[0U];
+  b1 = tmp[1U];
+  b2 = tmp[2U];
+  b3 = tmp[3U];
+  b4 = tmp[4U];
+  tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U;
+  tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U;
+  tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U;
+  tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U;
+  tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U;
+  {
+    uint32_t i;
+    for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+    {
+      uint64_t xi = a[i];
+      uint64_t yi = tmp[i];
+      a[i] = yi - xi;
+    }
+  }
+}
+
+inline static void Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s)
+{
+  KRML_CHECK_SIZE(sizeof (FStar_UInt128_uint128), (uint32_t)5U);
+  {
+    FStar_UInt128_uint128 tmp[5U];
+    {
+      uint32_t _i;
+      for (_i = 0U; _i < (uint32_t)5U; ++_i)
+        tmp[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U);
+    }
+    {
+      FStar_UInt128_uint128 b4;
+      FStar_UInt128_uint128 b0;
+      FStar_UInt128_uint128 b4_;
+      FStar_UInt128_uint128 b0_;
+      {
+        uint32_t i;
+        for (i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U)
+        {
+          uint64_t xi = b[i];
+          tmp[i] = FStar_UInt128_mul_wide(xi, s);
+        }
+      }
+      Hacl_Bignum_Fproduct_carry_wide_(tmp);
+      b4 = tmp[4U];
+      b0 = tmp[0U];
+      b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU));
+      b0_ =
+        FStar_UInt128_add(b0,
+          FStar_UInt128_mul_wide((uint64_t)19U,
+            FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U))));
+      tmp[4U] = b4_;
+      tmp[0U] = b0_;
+      Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+    }
+  }
+}
+
+inline static void Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b)
+{
+  Hacl_Bignum_Fmul_fmul(output, a, b);
+}
+
+inline static void Hacl_Bignum_crecip(uint64_t *output, uint64_t *input)
+{
+  Hacl_Bignum_Crecip_crecip(output, input);
+}
+
+static void
+Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+  uint32_t i = ctr - (uint32_t)1U;
+  uint64_t ai = a[i];
+  uint64_t bi = b[i];
+  uint64_t x = swap1 & (ai ^ bi);
+  uint64_t ai1 = ai ^ x;
+  uint64_t bi1 = bi ^ x;
+  a[i] = ai1;
+  b[i] = bi1;
+}
+
+static void
+Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+  if (!(ctr == (uint32_t)0U))
+  {
+    uint32_t i;
+    Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr);
+    i = ctr - (uint32_t)1U;
+    Hacl_EC_Point_swap_conditional_(a, b, swap1, i);
+  }
+}
+
+static void Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap)
+{
+  uint64_t swap1 = (uint64_t)0U - iswap;
+  Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U);
+  Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U);
+}
+
+static void Hacl_EC_Point_copy(uint64_t *output, uint64_t *input)
+{
+  memcpy(output, input, (uint32_t)5U * sizeof input[0U]);
+  memcpy(output + (uint32_t)5U,
+    input + (uint32_t)5U,
+    (uint32_t)5U * sizeof (input + (uint32_t)5U)[0U]);
+}
+
+static void Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input)
+{
+  uint64_t i0 = load64_le(input);
+  uint8_t *x00 = input + (uint32_t)6U;
+  uint64_t i1 = load64_le(x00);
+  uint8_t *x01 = input + (uint32_t)12U;
+  uint64_t i2 = load64_le(x01);
+  uint8_t *x02 = input + (uint32_t)19U;
+  uint64_t i3 = load64_le(x02);
+  uint8_t *x0 = input + (uint32_t)24U;
+  uint64_t i4 = load64_le(x0);
+  uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU;
+  uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU;
+  output[0U] = output0;
+  output[1U] = output1;
+  output[2U] = output2;
+  output[3U] = output3;
+  output[4U] = output4;
+}
+
+static void Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
+  uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
+  uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
+  uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
+  uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
+  input[0U] = t0_;
+  input[1U] = t1__;
+  input[2U] = t2__;
+  input[3U] = t3__;
+  input[4U] = t4_;
+}
+
+static void Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input)
+{
+  Hacl_EC_Format_fcontract_first_carry_pass(input);
+  Hacl_Bignum_Modulo_carry_top(input);
+}
+
+static void Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t t1_ = t1 + (t0 >> (uint32_t)51U);
+  uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU;
+  uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U);
+  uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U);
+  uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU;
+  uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U);
+  uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU;
+  input[0U] = t0_;
+  input[1U] = t1__;
+  input[2U] = t2__;
+  input[3U] = t3__;
+  input[4U] = t4_;
+}
+
+static void Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input)
+{
+  uint64_t i0;
+  uint64_t i1;
+  uint64_t i0_;
+  uint64_t i1_;
+  Hacl_EC_Format_fcontract_second_carry_pass(input);
+  Hacl_Bignum_Modulo_carry_top(input);
+  i0 = input[0U];
+  i1 = input[1U];
+  i0_ = i0 & (uint64_t)0x7ffffffffffffU;
+  i1_ = i1 + (i0 >> (uint32_t)51U);
+  input[0U] = i0_;
+  input[1U] = i1_;
+}
+
+static void Hacl_EC_Format_fcontract_trim(uint64_t *input)
+{
+  uint64_t a0 = input[0U];
+  uint64_t a1 = input[1U];
+  uint64_t a2 = input[2U];
+  uint64_t a3 = input[3U];
+  uint64_t a4 = input[4U];
+  uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU);
+  uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU);
+  uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
+  uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask);
+  uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask);
+  uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask);
+  input[0U] = a0_;
+  input[1U] = a1_;
+  input[2U] = a2_;
+  input[3U] = a3_;
+  input[4U] = a4_;
+}
+
+static void Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input)
+{
+  uint64_t t0 = input[0U];
+  uint64_t t1 = input[1U];
+  uint64_t t2 = input[2U];
+  uint64_t t3 = input[3U];
+  uint64_t t4 = input[4U];
+  uint64_t o0 = t1 << (uint32_t)51U | t0;
+  uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U;
+  uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U;
+  uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U;
+  uint8_t *b0 = output;
+  uint8_t *b1 = output + (uint32_t)8U;
+  uint8_t *b2 = output + (uint32_t)16U;
+  uint8_t *b3 = output + (uint32_t)24U;
+  store64_le(b0, o0);
+  store64_le(b1, o1);
+  store64_le(b2, o2);
+  store64_le(b3, o3);
+}
+
+static void Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input)
+{
+  Hacl_EC_Format_fcontract_first_carry_full(input);
+  Hacl_EC_Format_fcontract_second_carry_full(input);
+  Hacl_EC_Format_fcontract_trim(input);
+  Hacl_EC_Format_fcontract_store(output, input);
+}
+
+static void Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point)
+{
+  uint64_t *x = point;
+  uint64_t *z = point + (uint32_t)5U;
+  uint64_t buf[10U] = { 0U };
+  uint64_t *zmone = buf;
+  uint64_t *sc = buf + (uint32_t)5U;
+  Hacl_Bignum_crecip(zmone, z);
+  Hacl_Bignum_fmul(sc, x, zmone);
+  Hacl_EC_Format_fcontract(scalar, sc);
+}
+
+static void
+Hacl_EC_AddAndDouble_fmonty(
+  uint64_t *pp,
+  uint64_t *ppq,
+  uint64_t *p,
+  uint64_t *pq,
+  uint64_t *qmqp
+)
+{
+  uint64_t *qx = qmqp;
+  uint64_t *x2 = pp;
+  uint64_t *z2 = pp + (uint32_t)5U;
+  uint64_t *x3 = ppq;
+  uint64_t *z3 = ppq + (uint32_t)5U;
+  uint64_t *x = p;
+  uint64_t *z = p + (uint32_t)5U;
+  uint64_t *xprime = pq;
+  uint64_t *zprime = pq + (uint32_t)5U;
+  uint64_t buf[40U] = { 0U };
+  uint64_t *origx = buf;
+  uint64_t *origxprime0 = buf + (uint32_t)5U;
+  uint64_t *xxprime0 = buf + (uint32_t)25U;
+  uint64_t *zzprime0 = buf + (uint32_t)30U;
+  uint64_t *origxprime;
+  uint64_t *xx0;
+  uint64_t *zz0;
+  uint64_t *xxprime;
+  uint64_t *zzprime;
+  uint64_t *zzzprime;
+  uint64_t *zzz;
+  uint64_t *xx;
+  uint64_t *zz;
+  uint64_t scalar;
+  memcpy(origx, x, (uint32_t)5U * sizeof x[0U]);
+  Hacl_Bignum_fsum(x, z);
+  Hacl_Bignum_fdifference(z, origx);
+  memcpy(origxprime0, xprime, (uint32_t)5U * sizeof xprime[0U]);
+  Hacl_Bignum_fsum(xprime, zprime);
+  Hacl_Bignum_fdifference(zprime, origxprime0);
+  Hacl_Bignum_fmul(xxprime0, xprime, z);
+  Hacl_Bignum_fmul(zzprime0, x, zprime);
+  origxprime = buf + (uint32_t)5U;
+  xx0 = buf + (uint32_t)15U;
+  zz0 = buf + (uint32_t)20U;
+  xxprime = buf + (uint32_t)25U;
+  zzprime = buf + (uint32_t)30U;
+  zzzprime = buf + (uint32_t)35U;
+  memcpy(origxprime, xxprime, (uint32_t)5U * sizeof xxprime[0U]);
+  Hacl_Bignum_fsum(xxprime, zzprime);
+  Hacl_Bignum_fdifference(zzprime, origxprime);
+  Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U);
+  Hacl_Bignum_fmul(z3, zzzprime, qx);
+  Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U);
+  Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U);
+  zzz = buf + (uint32_t)10U;
+  xx = buf + (uint32_t)15U;
+  zz = buf + (uint32_t)20U;
+  Hacl_Bignum_fmul(x2, xx, zz);
+  Hacl_Bignum_fdifference(zz, xx);
+  scalar = (uint64_t)121665U;
+  Hacl_Bignum_fscalar(zzz, zz, scalar);
+  Hacl_Bignum_fsum(zzz, xx);
+  Hacl_Bignum_fmul(z2, zzz, zz);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt
+)
+{
+  uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U);
+  uint64_t bit;
+  Hacl_EC_Point_swap_conditional(nq, nqpq, bit0);
+  Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q);
+  bit = (uint64_t)(byt >> (uint32_t)7U);
+  Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt
+)
+{
+  uint8_t byt1;
+  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
+  byt1 = byt << (uint32_t)1U;
+  Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop(
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint8_t byt,
+  uint32_t i
+)
+{
+  if (!(i == (uint32_t)0U))
+  {
+    uint32_t i_ = i - (uint32_t)1U;
+    uint8_t byt_;
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt);
+    byt_ = byt << (uint32_t)2U;
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_);
+  }
+}
+
+static void
+Hacl_EC_Ladder_BigLoop_cmult_big_loop(
+  uint8_t *n1,
+  uint64_t *nq,
+  uint64_t *nqpq,
+  uint64_t *nq2,
+  uint64_t *nqpq2,
+  uint64_t *q,
+  uint32_t i
+)
+{
+  if (!(i == (uint32_t)0U))
+  {
+    uint32_t i1 = i - (uint32_t)1U;
+    uint8_t byte = n1[i1];
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U);
+    Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1);
+  }
+}
+
+static void Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q)
+{
+  uint64_t point_buf[40U] = { 0U };
+  uint64_t *nq = point_buf;
+  uint64_t *nqpq = point_buf + (uint32_t)10U;
+  uint64_t *nq2 = point_buf + (uint32_t)20U;
+  uint64_t *nqpq2 = point_buf + (uint32_t)30U;
+  Hacl_EC_Point_copy(nqpq, q);
+  nq[0U] = (uint64_t)1U;
+  Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U);
+  Hacl_EC_Point_copy(result, nq);
+}
+
+void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
+{
+  uint64_t buf0[10U] = { 0U };
+  uint64_t *x0 = buf0;
+  uint64_t *z = buf0 + (uint32_t)5U;
+  uint64_t *q;
+  Hacl_EC_Format_fexpand(x0, basepoint);
+  z[0U] = (uint64_t)1U;
+  q = buf0;
+  {
+    uint8_t e[32U] = { 0U };
+    uint8_t e0;
+    uint8_t e31;
+    uint8_t e01;
+    uint8_t e311;
+    uint8_t e312;
+    uint8_t *scalar;
+    memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]);
+    e0 = e[0U];
+    e31 = e[31U];
+    e01 = e0 & (uint8_t)248U;
+    e311 = e31 & (uint8_t)127U;
+    e312 = e311 | (uint8_t)64U;
+    e[0U] = e01;
+    e[31U] = e312;
+    scalar = e;
+    {
+      uint64_t buf[15U] = { 0U };
+      uint64_t *nq = buf;
+      uint64_t *x = nq;
+      x[0U] = (uint64_t)1U;
+      Hacl_EC_Ladder_cmult(nq, scalar, q);
+      Hacl_EC_Format_scalar_of_point(mypublic, nq);
+    }
+  }
+}
+

diff --git a/3rdparty/everest/library/x25519.c b/3rdparty/everest/library/x25519.c
new file mode 100644
index 0000000..83064dc
--- /dev/null
+++ b/3rdparty/everest/library/x25519.c

@@ -0,0 +1,186 @@
+/*
+ *  ECDH with curve-optimized implementation multiplexing
+ *
+ *  Copyright 2016-2018 INRIA and Microsoft Corporation
+ *  SPDX-License-Identifier: Apache-2.0
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License"); you may
+ *  not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ *  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ *  This file is part of Mbed TLS (https://tls.mbed.org)
+ */
+
+#include "common.h"
+
+#if defined(MBEDTLS_ECDH_C) && defined(MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED)
+
+#include <mbedtls/ecdh.h>
+
+#if !(defined(__SIZEOF_INT128__) && (__SIZEOF_INT128__ == 16))
+#define KRML_VERIFIED_UINT128
+#endif
+
+#include <Hacl_Curve25519.h>
+#include <mbedtls/platform_util.h>
+
+#include "x25519.h"
+
+#include <string.h>
+
+/*
+ * Initialize context
+ */
+void mbedtls_x25519_init( mbedtls_x25519_context *ctx )
+{
+    mbedtls_platform_zeroize( ctx, sizeof( mbedtls_x25519_context ) );
+}
+
+/*
+ * Free context
+ */
+void mbedtls_x25519_free( mbedtls_x25519_context *ctx )
+{
+    if( ctx == NULL )
+        return;
+
+    mbedtls_platform_zeroize( ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES );
+    mbedtls_platform_zeroize( ctx->peer_point, MBEDTLS_X25519_KEY_SIZE_BYTES );
+}
+
+int mbedtls_x25519_make_params( mbedtls_x25519_context *ctx, size_t *olen,
+                        unsigned char *buf, size_t blen,
+                        int( *f_rng )(void *, unsigned char *, size_t),
+                        void *p_rng )
+{
+    int ret = 0;
+
+    uint8_t base[MBEDTLS_X25519_KEY_SIZE_BYTES] = {0};
+
+    if( ( ret = f_rng( p_rng, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) ) != 0 )
+        return ret;
+
+    *olen = MBEDTLS_X25519_KEY_SIZE_BYTES + 4;
+    if( blen < *olen )
+        return( MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL );
+
+    *buf++ = MBEDTLS_ECP_TLS_NAMED_CURVE;
+    *buf++ = MBEDTLS_ECP_TLS_CURVE25519 >> 8;
+    *buf++ = MBEDTLS_ECP_TLS_CURVE25519 & 0xFF;
+    *buf++ = MBEDTLS_X25519_KEY_SIZE_BYTES;
+
+    base[0] = 9;
+    Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, base );
+
+    base[0] = 0;
+    if( memcmp( buf, base, MBEDTLS_X25519_KEY_SIZE_BYTES) == 0 )
+        return MBEDTLS_ERR_ECP_RANDOM_FAILED;
+
+    return( 0 );
+}
+
+int mbedtls_x25519_read_params( mbedtls_x25519_context *ctx,
+                        const unsigned char **buf, const unsigned char *end )
+{
+    if( end - *buf < MBEDTLS_X25519_KEY_SIZE_BYTES + 1 )
+        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
+
+    if( ( *(*buf)++ != MBEDTLS_X25519_KEY_SIZE_BYTES ) )
+        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
+
+    memcpy( ctx->peer_point, *buf, MBEDTLS_X25519_KEY_SIZE_BYTES );
+    *buf += MBEDTLS_X25519_KEY_SIZE_BYTES;
+    return( 0 );
+}
+
+int mbedtls_x25519_get_params( mbedtls_x25519_context *ctx, const mbedtls_ecp_keypair *key,
+                               mbedtls_x25519_ecdh_side side )
+{
+    size_t olen = 0;
+
+    switch( side ) {
+    case MBEDTLS_X25519_ECDH_THEIRS:
+        return mbedtls_ecp_point_write_binary( &key->grp, &key->Q, MBEDTLS_ECP_PF_COMPRESSED, &olen, ctx->peer_point, MBEDTLS_X25519_KEY_SIZE_BYTES );
+    case MBEDTLS_X25519_ECDH_OURS:
+        return mbedtls_mpi_write_binary_le( &key->d, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES );
+    default:
+        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
+    }
+}
+
+int mbedtls_x25519_calc_secret( mbedtls_x25519_context *ctx, size_t *olen,
+                        unsigned char *buf, size_t blen,
+                        int( *f_rng )(void *, unsigned char *, size_t),
+                        void *p_rng )
+{
+    /* f_rng and p_rng are not used here because this implementation does not
+       need blinding since it has constant trace. */
+    (( void )f_rng);
+    (( void )p_rng);
+
+    *olen = MBEDTLS_X25519_KEY_SIZE_BYTES;
+
+    if( blen < *olen )
+        return( MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL );
+
+    Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, ctx->peer_point);
+
+    /* Wipe the DH secret and don't let the peer chose a small subgroup point */
+    mbedtls_platform_zeroize( ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES );
+
+    if( memcmp( buf, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES) == 0 )
+        return MBEDTLS_ERR_ECP_RANDOM_FAILED;
+
+    return( 0 );
+}
+
+int mbedtls_x25519_make_public( mbedtls_x25519_context *ctx, size_t *olen,
+                        unsigned char *buf, size_t blen,
+                        int( *f_rng )(void *, unsigned char *, size_t),
+                        void *p_rng )
+{
+    int ret = 0;
+    unsigned char base[MBEDTLS_X25519_KEY_SIZE_BYTES] = { 0 };
+
+    if( ctx == NULL )
+        return( MBEDTLS_ERR_ECP_BAD_INPUT_DATA );
+
+    if( ( ret = f_rng( p_rng, ctx->our_secret, MBEDTLS_X25519_KEY_SIZE_BYTES ) ) != 0 )
+        return ret;
+
+    *olen = MBEDTLS_X25519_KEY_SIZE_BYTES + 1;
+    if( blen < *olen )
+        return(MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL);
+    *buf++ = MBEDTLS_X25519_KEY_SIZE_BYTES;
+
+    base[0] = 9;
+    Hacl_Curve25519_crypto_scalarmult( buf, ctx->our_secret, base );
+
+    base[0] = 0;
+    if( memcmp( buf, base, MBEDTLS_X25519_KEY_SIZE_BYTES ) == 0 )
+        return MBEDTLS_ERR_ECP_RANDOM_FAILED;
+
+    return( ret );
+}
+
+int mbedtls_x25519_read_public( mbedtls_x25519_context *ctx,
+                        const unsigned char *buf, size_t blen )
+{
+    if( blen < MBEDTLS_X25519_KEY_SIZE_BYTES + 1 )
+        return(MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL);
+    if( (*buf++ != MBEDTLS_X25519_KEY_SIZE_BYTES) )
+        return(MBEDTLS_ERR_ECP_BAD_INPUT_DATA);
+    memcpy( ctx->peer_point, buf, MBEDTLS_X25519_KEY_SIZE_BYTES );
+    return( 0 );
+}
+
+
+#endif /* MBEDTLS_ECDH_C && MBEDTLS_ECDH_VARIANT_EVEREST_ENABLED */

diff --git a/3rdparty/p256-m/CMakeLists.txt b/3rdparty/p256-m/CMakeLists.txt
new file mode 100644
index 0000000..2ef0d48
--- /dev/null
+++ b/3rdparty/p256-m/CMakeLists.txt

@@ -0,0 +1,40 @@
+set(p256m_target ${MBEDTLS_TARGET_PREFIX}p256m)
+
+add_library(${p256m_target}
+    p256-m_driver_entrypoints.c
+    p256-m/p256-m.c)
+
+target_include_directories(${p256m_target}
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/p256-m>
+         $<BUILD_INTERFACE:${MBEDTLS_DIR}/include>
+         $<INSTALL_INTERFACE:include>
+  PRIVATE ${MBEDTLS_DIR}/library/)
+
+# Pass-through MBEDTLS_CONFIG_FILE and MBEDTLS_USER_CONFIG_FILE
+# This must be duplicated from library/CMakeLists.txt because
+# p256m is not directly linked against any mbedtls targets
+# so does not inherit the compile definitions.
+if(MBEDTLS_CONFIG_FILE)
+    target_compile_definitions(${p256m_target}
+        PUBLIC MBEDTLS_CONFIG_FILE="${MBEDTLS_CONFIG_FILE}")
+endif()
+if(MBEDTLS_USER_CONFIG_FILE)
+    target_compile_definitions(${p256m_target}
+        PUBLIC MBEDTLS_USER_CONFIG_FILE="${MBEDTLS_USER_CONFIG_FILE}")
+endif()
+
+if(INSTALL_MBEDTLS_HEADERS)
+
+  install(DIRECTORY :${CMAKE_CURRENT_SOURCE_DIR}
+    DESTINATION include
+    FILE_PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
+    DIRECTORY_PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
+    FILES_MATCHING PATTERN "*.h")
+
+endif(INSTALL_MBEDTLS_HEADERS)
+
+install(TARGETS ${p256m_target}
+EXPORT MbedTLSTargets
+DESTINATION ${CMAKE_INSTALL_LIBDIR}
+PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ)

diff --git a/3rdparty/p256-m/Makefile.inc b/3rdparty/p256-m/Makefile.inc
new file mode 100644
index 0000000..53bb55b
--- /dev/null
+++ b/3rdparty/p256-m/Makefile.inc

@@ -0,0 +1,5 @@
+THIRDPARTY_INCLUDES+=-I$(THIRDPARTY_DIR)/p256-m/p256-m/include -I$(THIRDPARTY_DIR)/p256-m/p256-m/include/p256-m -I$(THIRDPARTY_DIR)/p256-m/p256-m_driver_interface
+
+THIRDPARTY_CRYPTO_OBJECTS+= \
+	$(THIRDPARTY_DIR)/p256-m//p256-m_driver_entrypoints.o \
+	$(THIRDPARTY_DIR)/p256-m//p256-m/p256-m.o

diff --git a/3rdparty/p256-m/README.md b/3rdparty/p256-m/README.md
new file mode 100644
index 0000000..ec90f34
--- /dev/null
+++ b/3rdparty/p256-m/README.md

@@ -0,0 +1,4 @@
+The files within the `p256-m/` subdirectory originate from the [p256-m GitHub repository](https://github.com/mpg/p256-m). They are distributed here under a dual Apache-2.0 OR GPL-2.0-or-later license. They are authored by Manuel Pégourié-Gonnard. p256-m is a minimalistic implementation of ECDH and ECDSA on NIST P-256, especially suited to constrained 32-bit environments. Mbed TLS documentation for integrating drivers uses p256-m as an example of a software accelerator, and describes how it can be integrated alongside Mbed TLS. It should be noted that p256-m files in the Mbed TLS repo will not be updated regularly, so they may not have fixes and improvements present in the upstream project.
+
+The files `p256-m.c`, `p256-m.h` and `README.md` have been taken from the `p256-m` repository.
+It should be noted that p256-m deliberately does not supply its own cryptographically secure RNG function. As a result, the PSA RNG is used, with `p256_generate_random()` wrapping `psa_generate_random()`.

diff --git a/3rdparty/p256-m/p256-m/README.md b/3rdparty/p256-m/p256-m/README.md
new file mode 100644
index 0000000..5e88f71
--- /dev/null
+++ b/3rdparty/p256-m/p256-m/README.md

@@ -0,0 +1,544 @@
+*This is the original README for the p256-m repository. Please note that as
+only a subset of p256-m's files are present in Mbed TLS, this README may refer
+to files that are not present/relevant here.*
+
+p256-m is a minimalistic implementation of ECDH and ECDSA on NIST P-256,
+especially suited to constrained 32-bit environments. It's written in standard
+C, with optional bits of assembly for Arm Cortex-M and Cortex-A CPUs.
+
+Its design is guided by the following goals in this order:
+
+1. correctness & security;
+2. low code size & RAM usage;
+3. runtime performance.
+
+Most cryptographic implementations care more about speed than footprint, and
+some might even risk weakening security for more speed. p256-m was written
+because I wanted to see what happened when reversing the usual emphasis.
+
+The result is a full implementation of ECDH and ECDSA in **less than 3KiB of
+code**, using **less than 768 bytes of RAM**, with comparable performance
+to existing implementations (see below) - in less than 700 LOC.
+
+_Contents of this Readme:_
+
+- [Correctness](#correctness)
+- [Security](#security)
+- [Code size](#code-size)
+- [RAM usage](#ram-usage)
+- [Runtime performance](#runtime-performance)
+- [Comparison with other implementations](#comparison-with-other-implementations)
+- [Design overview](#design-overview)
+- [Notes about other curves](#notes-about-other-curves)
+- [Notes about other platforms](#notes-about-other-platforms)
+
+## Correctness
+
+**API design:**
+
+- The API is minimal: only 4 public functions.
+- Each public function fully validates its inputs and returns specific errors.
+- The API uses arrays of octets for all input and output.
+
+**Testing:**
+
+- p256-m is validated against multiple test vectors from various RFCs and
+  NIST.
+- In addition, crafted inputs are used for negative testing and to reach
+  corner cases.
+- Two test suites are provided: one for closed-box testing (using only the
+  public API), one for open-box testing (for unit-testing internal functions,
+and reaching more error cases by exploiting knowledge of how the RNG is used).
+- The resulting branch coverage is maximal: closed-box testing reaches all
+  branches except four; three of them are reached by open-box testing using a
+rigged RNG; the last branch could only be reached by computing a discrete log
+on P-256... See `coverage.sh`.
+- Testing also uses dynamic analysis: valgrind, ASan, MemSan, UBSan.
+
+**Code quality:**
+
+- The code is standard C99; it builds without warnings with `clang
+  -Weverything` and `gcc -Wall -Wextra -pedantic`.
+- The code is small and well documented, including internal APIs: with the
+  header file, it's less than 700 lines of code, and more lines of comments
+than of code.
+- However it _has not been reviewed_ independently so far, as this is a
+  personal project.
+
+**Short Weierstrass pitfalls:**
+
+Its has been [pointed out](https://safecurves.cr.yp.to/) that the NIST curves,
+and indeed all Short Weierstrass curves, have a number of pitfalls including
+risk for the implementation to:
+
+- "produce incorrect results for some rare curve points" - this is avoided by
+  carefully checking the validity domain of formulas used throughout the code;
+- "leak secret data when the input isn't a curve point" - this is avoided by
+  validating that points lie on the curve every time a point is deserialized.
+
+## Security
+
+In addition to the above correctness claims, p256-m has the following
+properties:
+
+- it has no branch depending (even indirectly) on secret data;
+- it has no memory access depending (even indirectly) on secret data.
+
+These properties are checked using valgrind and MemSan with the ideas
+behind [ctgrind](https://github.com/agl/ctgrind), see `consttime.sh`.
+
+In addition to avoiding branches and memory accesses depending on secret data,
+p256-m also avoid instructions (or library functions) whose execution time
+depends on the value of operands on cores of interest. Namely, it never uses
+integer division, and for multiplication by default it only uses 16x16->32 bit
+unsigned multiplication. On cores which have a constant-time 32x32->64 bit
+unsigned multiplication instruction, the symbol `MUL64_IS_CONSTANT_TIME` can
+be defined by the user at compile-time to take advantage of it in order to
+improve performance and code size. (On Cortex-M and Cortex-A cores wtih GCC or
+Clang this is not necessary, since inline assembly is used instead.)
+
+As a result, p256-m should be secure against the following classes of attackers:
+
+1. attackers who can only manipulate the input and observe the output;
+2. attackers who can also measure the total computation time of the operation;
+3. attackers who can also observe and manipulate micro-architectural features
+   such as the cache or branch predictor with arbitrary precision.
+
+However, p256-m makes no attempt to protect against:
+
+4. passive physical attackers who can record traces of physical emissions
+   (power, EM, sound) of the CPU while it manipulates secrets;
+5. active physical attackers who can also inject faults in the computation.
+
+(Note: p256-m should actually be secure against SPA, by virtue of being fully
+constant-flow, but is not expected to resist any other physical attack.)
+
+**Warning:** p256-m requires an externally-provided RNG function. If that
+function is not cryptographically secure, then neither is p256-m's key
+generation or ECDSA signature generation.
+
+_Note:_ p256-m also follows best practices such as securely erasing secret
+data on the stack before returning.
+
+## Code size
+
+Compiled with
+[ARM-GCC 9](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads),
+with `-mthumb -Os`, here are samples of code sizes reached on selected cores:
+
+- Cortex-M0: 2988 bytes
+- Cortex-M4: 2900 bytes
+- Cortex-A7: 2924 bytes
+
+Clang was also tried but tends to generate larger code (by about 10%). For
+details, see `sizes.sh`.
+
+**What's included:**
+
+- Full input validation and (de)serialisation of input/outputs to/from bytes.
+- Cleaning up secret values from the stack before returning from a function.
+- The code has no dependency on libc functions or the toolchain's runtime
+  library (such as helpers for long multiply); this can be checked for the
+Arm-GCC toolchain with the `deps.sh` script.
+
+**What's excluded:**
+
+- A secure RNG function needs to be provided externally, see
+  `p256_generate_random()` in `p256-m.h`.
+
+## RAM usage
+
+p256-m doesn't use any dynamic memory (on the heap), only the stack. Here's
+how much stack is used by each of its 4 public functions on selected cores:
+
+| Function                  | Cortex-M0 | Cortex-M4 | Cortex-A7 |
+| ------------------------- | --------: | --------: | --------: |
+| `p256_gen_keypair`        |       608 |       564 |       564 |
+| `p256_ecdh_shared_secret` |       640 |       596 |       596 |
+| `p256_ecdsa_sign`         |       664 |       604 |       604 |
+| `p256_ecdsa_verify`       |       752 |       700 |       700 |
+
+For details, see `stack.sh`, `wcs.py` and `libc.msu` (the above figures assume
+that the externally-provided RNG function uses at most 384 bytes of stack).
+
+## Runtime performance
+
+Here are the timings of each public function in milliseconds measured on
+platforms based on a selection of cores:
+
+- Cortex-M0 at  48 MHz: STM32F091 board running Mbed OS 6
+- Cortex-M4 at 100 MHz: STM32F411 board running Mbed OS 6
+- Cortex-A7 at 900 MHz: Raspberry Pi 2B running Raspbian Buster
+
+| Function                  | Cortex-M0 | Cortex-M4 | Cortex-A7 |
+| ------------------------- | --------: | --------: | --------: |
+| `p256_gen_keypair`        |       921 |       145 |        11 |
+| `p256_ecdh_shared_secret` |       922 |       144 |        11 |
+| `p256_ecdsa_sign`         |       990 |       155 |        12 |
+| `p256_ecdsa_verify`       |      1976 |       309 |        24 |
+| Sum of the above          |      4809 |       753 |        59 |
+
+The sum of these operations corresponds to a TLS handshake using ECDHE-ECDSA
+with mutual authentication based on raw public keys or directly-trusted
+certificates (otherwise, add one 'verify' for each link in the peer's
+certificate chain).
+
+_Note_: the above figures where obtained by compiling with GCC, which is able
+to use inline assembly. Without that inline assembly (22 lines for Cortex-M0,
+1 line for Cortex-M4), the code would be roughly 2 times slower on those
+platforms. (The effect is much less important on the Cortex-A7 core.)
+
+For details, see `bench.sh`, `benchmark.c` and `on-target-benchmark/`.
+
+## Comparison with other implementations
+
+The most relevant/convenient implementation for comparisons is
+[TinyCrypt](https://github.com/intel/tinycrypt), as it's also a standalone
+implementation of ECDH and ECDSA on P-256 only, that also targets constrained
+devices. Other implementations tend to implement many curves and build on a
+shared bignum/MPI module (possibly also supporting RSA), which makes fair
+comparisons less convenient.
+
+The scripts used for TinyCrypt measurements are available in [this
+branch](https://github.com/mpg/tinycrypt/tree/measurements), based on version
+0.2.8.
+
+**Code size**
+
+| Core      | p256-m | TinyCrypt |
+| --------- | -----: | --------: |
+| Cortex-M0 |   2988 |      6134 |
+| Cortex-M4 |   2900 |      5934 |
+| Cortex-A7 |   2924 |      5934 |
+
+**RAM usage**
+
+TinyCrypto also uses no heap, only the stack. Here's the RAM used by each
+operation on a Cortex-M0 core:
+
+| operation          | p256-m | TinyCrypt |
+| ------------------ | -----: | --------: |
+| key generation     |    608 |       824 |
+| ECDH shared secret |    640 |       728 |
+| ECDSA sign         |    664 |       880 |
+| ECDSA verify       |    752 |       824 |
+
+On a Cortex-M4 or Cortex-A7 core (identical numbers):
+
+| operation          | p256-m | TinyCrypt |
+| ------------------ | -----: | --------: |
+| key generation     |    564 |       796 |
+| ECDH shared secret |    596 |       700 |
+| ECDSA sign         |    604 |       844 |
+| ECDSA verify       |    700 |       808 |
+
+**Runtime performance**
+
+Here are the timings of each operation in milliseconds measured on
+platforms based on a selection of cores:
+
+_Cortex-M0_ at  48 MHz: STM32F091 board running Mbed OS 6
+
+| Operation          | p256-m | TinyCrypt |
+| ------------------ | -----: | --------: |
+| Key generation     |    921 |       979 |
+| ECDH shared secret |    922 |       975 |
+| ECDSA sign         |    990 |      1009 |
+| ECDSA verify       |   1976 |      1130 |
+| Sum of those 4     |   4809 |      4093 |
+
+_Cortex-M4_ at 100 MHz: STM32F411 board running Mbed OS 6
+
+| Operation          | p256-m | TinyCrypt |
+| ------------------ | -----: | --------: |
+| Key generation     |    145 |       178 |
+| ECDH shared secret |    144 |       177 |
+| ECDSA sign         |    155 |       188 |
+| ECDSA verify       |    309 |       210 |
+| Sum of those 4     |    753 |       753 |
+
+_Cortex-A7_ at 900 MHz: Raspberry Pi 2B running Raspbian Buster
+
+| Operation          | p256-m | TinyCrypt |
+| ------------------ | -----: | --------: |
+| Key generation     |     11 |        13 |
+| ECDH shared secret |     11 |        13 |
+| ECDSA sign         |     12 |        14 |
+| ECDSA verify       |     24 |        15 |
+| Sum of those 4     |     59 |        55 |
+
+_64-bit Intel_ (i7-6500U at 2.50GHz) laptop running Ubuntu 20.04
+
+Note: results in microseconds (previous benchmarks in milliseconds)
+
+| Operation          | p256-m | TinyCrypt |
+| ------------------ | -----: | --------: |
+| Key generation     |   1060 |      1627 |
+| ECDH shared secret |   1060 |      1611 |
+| ECDSA sign         |   1136 |      1712 |
+| ECDSA verify       |   2279 |      1888 |
+| Sum of those 4     |   5535 |      6838 |
+
+**Other differences**
+
+- While p256-m fully validates all inputs, Tinycrypt's ECDH shared secret
+  function doesn't include validation of the peer's public key, which should be
+done separately by the user for static ECDH (there are attacks [when users
+forget](https://link.springer.com/chapter/10.1007/978-3-319-24174-6_21)).
+- The two implementations have slightly different security characteristics:
+  p256-m is fully constant-time from the ground up so should be more robust
+than TinyCrypt against powerful local attackers (such as an untrusted OS
+attacking a secure enclave); on the other hand TinyCrypt includes coordinate
+randomisation which protects against some passive physical attacks (such as
+DPA, see Table 3, column C9 of [this
+paper](https://www.esat.kuleuven.be/cosic/publications/article-2293.pdf#page=12)),
+which p256-m completely ignores.
+- TinyCrypt's code looks like it could easily be expanded to support other
+  curves, while p256-m has much more hard-coded to minimize code size (see
+"Notes about other curves" below).
+- TinyCrypt uses a specialised routine for reduction modulo the curve prime,
+  exploiting its structure as a Solinas prime, which should be faster than the
+generic Montgomery reduction used by p256-m, but other factors appear to
+compensate for that.
+- TinyCrypt uses Co-Z Jacobian formulas for point operation, which should be
+  faster (though a bit larger) than the mixed affine-Jacobian formulas
+used by p256-m, but again other factors appear to compensate for that.
+- p256-m uses bits of inline assembly for 64-bit multiplication on the
+  platforms used for benchmarking, while TinyCrypt uses only C (and the
+compiler's runtime library).
+- TinyCrypt uses a specialised routine based on Shamir's trick for
+  ECDSA verification, which gives much better performance than the generic
+code that p256-m uses in order to minimize code size.
+
+## Design overview
+
+The implementation is contained in a single file to keep most functions static
+and allow for more optimisations. It is organized in multiple layers:
+
+- Fixed-width multi-precision arithmetic
+- Fixed-width modular arithmetic
+- Operations on curve points
+- Operations with scalars
+- The public API
+
+**Multi-precision arithmetic.**
+
+Large integers are represented as arrays of `uint32_t` limbs. When carries may
+occur, casts to `uint64_t` are used to nudge the compiler towards using the
+CPU's carry flag. When overflow may occur, functions return a carry flag.
+
+This layer contains optional assembly for Cortex-M and Cortex-A cores, for the
+internal `u32_muladd64()` function, as well as two pure C versions of this
+function, depending on whether `MUL64_IS_CONSTANT_TIME`.
+
+This layer's API consists of:
+
+- addition, subtraction;
+- multiply-and-add, shift by one limb (for Montgomery multiplication);
+- conditional assignment, assignment of a small value;
+- comparison of two values for equality, comparison to 0 for equality;
+- (de)serialization as big-endian arrays of bytes.
+
+**Modular arithmetic.**
+
+All modular operations are done in the Montgomery domain, that is x is
+represented by `x * 2^256 mod m`; integers need to be converted to that domain
+before computations, and back from it afterwards. Montgomery constants
+associated to the curve's p and n are pre-computed and stored in static
+structures.
+
+Modular inversion is computed using Fermat's little theorem to get
+constant-time behaviour with respect to the value being inverted.
+
+This layer's API consists of:
+
+- the curve's constants p and n (and associated Montgomery constants);
+- modular addition, subtraction, multiplication, and inversion;
+- assignment of a small value;
+- conversion to/from Montgomery domain;
+- (de)serialization to/from bytes with integrated range checking and
+  Montgomery domain conversion.
+
+**Operations on curve points.**
+
+Curve points are represented using either affine or Jacobian coordinates;
+affine coordinates are extended to represent 0 as (0,0). Individual
+coordinates are always in the Montgomery domain.
+
+Not all formulas associated with affine or Jacobian coordinates are complete;
+great care is taken to document and satisfy each function's pre-conditions.
+
+This layer's API consists of:
+
+- curve constants: b from the equation, the base point's coordinates;
+- point validity check (on the curve and not 0);
+- Jacobian to affine coordinate conversion;
+- point doubling in Jacobian coordinates (complete formulas);
+- point addition in mixed affine-Jacobian coordinates (P not in {0, Q, -Q});
+- point addition-or-doubling in affine coordinates (leaky version, only used
+  for ECDSA verify where all data is public);
+- (de)serialization to/from bytes with integrated validity checking
+
+**Scalar operations.**
+
+The crucial function here is scalar multiplication. It uses a signed binary
+ladder, which is a variant of the good old double-and-add algorithm where an
+addition/subtraction is performed at each step. Again, care is taken to make
+sure the pre-conditions for the addition formulas are always satisfied. The
+signed binary ladder only works if the scalar is odd; this is ensured by
+negating both the scalar (mod n) and the input point if necessary.
+
+This layer's API consists of:
+
+- scalar multiplication
+- de-serialization from bytes with integrated range checking
+- generation of a scalar and its associated public key
+
+**Public API.**
+
+This layer builds on the others, but unlike them, all inputs and outputs are
+byte arrays. Key generation and ECDH shared secret computation are thin
+wrappers around internal functions, just taking care of format conversions and
+errors. The ECDSA functions have more non-trivial logic.
+
+This layer's API consists of:
+
+- key-pair generation
+- ECDH shared secret computation
+- ECDSA signature creation
+- ECDSA signature verification
+
+**Testing.**
+
+A self-contained, straightforward, pure-Python implementation was first
+produced as a warm-up and to help check intermediate values. Test vectors from
+various sources are embedded and used to validate the implementation.
+
+This implementation, `p256.py`, is used by a second Python script,
+`gen-test-data.py`, to generate additional data for both positive and negative
+testing, available from a C header file, that is then used by the closed-box
+and open-box test programs.
+
+p256-m can be compiled with extra instrumentation to mark secret data and
+allow either valgrind or MemSan to check that no branch or memory access
+depends on it (even indirectly). Macros are defined for this purpose near the
+top of the file.
+
+**Tested platforms.**
+
+There are 4 versions of the internal function `u32_muladd64`: two assembly
+versions, for Cortex-M/A cores with or without the DSP extension, and two
+pure-C versions, depending on whether `MUL64_IS_CONSTANT_TIME`.
+
+Tests are run on the following platforms:
+
+- `make` on x64 tests the pure-C version without `MUL64_IS_CONSTANT_TIME`
+  (with Clang).
+- `./consttime.sh` on x64 tests both pure-C versions (with Clang).
+- `make` on Arm v7-A (Raspberry Pi 2) tests the Arm-DSP assembly version (with
+  Clang).
+- `on-target-*box` on boards based on Cortex-M0 and M4 cores test both
+  assembly versions (with GCC).
+
+In addition:
+
+- `sizes.sh` builds the code for three Arm cores with GCC and Clang.
+- `deps.sh` checks for external dependencies with GCC.
+
+## Notes about other curves
+
+It should be clear that minimal code size can only be reached by specializing
+the implementation to the curve at hand. Here's a list of things in the
+implementation that are specific to the NIST P-256 curve, and how the
+implementation could be changed to expand to other curves, layer by layer (see
+"Design Overview" above).
+
+**Fixed-width multi-precision arithmetic:**
+
+- The number of limbs is hard-coded to 8. For other 256-bit curves, nothing to
+  change. For a curve of another size, hard-code to another value. For multiple
+curves of various sizes, add a parameter to each function specifying the
+number of limbs; when declaring arrays, always use the maximum number of
+limbs.
+
+**Fixed-width modular arithmetic:**
+
+- The values of the curve's constant p and n, and their associated Montgomery
+  constants, are hard-coded. For another curve, just hard-code the new constants.
+For multiple other curves, define all the constants, and from this layer's API
+only keep the functions that already accept a `mod` parameter (that is, remove
+convenience functions `m256_xxx_p()`).
+- The number of limbs is again hard-coded to 8. See above, but it order to
+  support multiple sizes there is no need to add a new parameter to functions
+in this layer: the existing `mod` parameter can include the number of limbs as
+well.
+
+**Operations on curve points:**
+
+- The values of the curve's constants b (constant term from the equation) and
+  gx, gy (coordinates of the base point) are hard-coded. For another curve,
+  hard-code the other values. For multiple curves, define each curve's value and
+add a "curve id" parameter to all functions in this layer.
+- The value of the curve's constant a is implicitly hard-coded to `-3` by using
+  a standard optimisation to save one multiplication in the first step of
+`point_double()`. For curves that don't have a == -3, replace that with the
+normal computation.
+- The fact that b != 0 in the curve equation is used indirectly, to ensure
+  that (0, 0) is not a point on the curve and re-use that value to represent
+the point 0. As far as I know, all Short Weierstrass curves standardized so
+far have b != 0.
+- The shape of the curve is assumed to be Short Weierstrass. For other curve
+  shapes (Montgomery, (twisted) Edwards), this layer would probably look very
+different (both implementation and API).
+
+**Scalar operations:**
+
+- If multiple curves are to be supported, all function in this layer need to
+  gain a new "curve id" parameter.
+- This layer assumes that the bit size of the curve's order n is the same as
+  that of the modulus p. This is true of most curves standardized so far, the
+only exception being secp224k1. If that curve were to be supported, the
+representation of `n` and scalars would need adapting to allow for an extra
+limb.
+- The bit size of the curve's order is hard-coded in `scalar_mult()`. For
+  multiple curves, this should be deduced from the "curve id" parameter.
+- The `scalar_mult()` function exploits the fact that the second least
+  significant bit of the curve's order n is set in order to avoid a special
+case. For curve orders that don't meet this criterion, we can just handle that
+special case (multiplication by +-2) separately (always compute that and
+conditionally assign it to the result).
+- The shape of the curve is again assumed to be Short Weierstrass. For other curve
+  shapes (Montgomery, (twisted) Edwards), this layer would probably have a
+very different implementation.
+
+**Public API:**
+
+- For multiple curves, all functions in this layer would need to gain a "curve
+  id" parameter and handle variable-sized input/output.
+- The shape of the curve is again assumed to be Short Weierstrass. For other curve
+  shapes (Montgomery, (twisted) Edwards), the ECDH API would probably look
+quite similar (with differences in the size of public keys), but the ECDSA API
+wouldn't apply and an EdDSA API would look pretty different.
+
+## Notes about other platforms
+
+While p256-m is standard C99, it is written with constrained 32-bit platforms
+in mind and makes a few assumptions about the platform:
+
+- The types `uint8_t`, `uint16_t`, `uint32_t` and `uint64_t` exist.
+- 32-bit unsigned addition and subtraction with carry are constant time.
+- 16x16->32-bit unsigned multiplication is available and constant time.
+
+Also, on platforms on which 64-bit addition and subtraction with carry, or
+even 64x64->128-bit multiplication, are available, p256-m makes no use of
+them, though they could significantly improve performance.
+
+This could be improved by replacing uses of arrays of `uint32_t` with a
+defined type throughout the internal APIs, and then on 64-bit platforms define
+that type to be an array of `uint64_t` instead, and making the obvious
+adaptations in the multi-precision arithmetic layer.
+
+Finally, the optional assembly code (which boosts performance by a factor 2 on
+tested Cortex-M CPUs, while slightly reducing code size and stack usage) is
+currently only available with compilers that support GCC's extended asm
+syntax (which includes GCC and Clang).

diff --git a/3rdparty/p256-m/p256-m/p256-m.c b/3rdparty/p256-m/p256-m/p256-m.c
new file mode 100644
index 0000000..42c35b5
--- /dev/null
+++ b/3rdparty/p256-m/p256-m/p256-m.c

@@ -0,0 +1,1514 @@
+/*
+ * Implementation of curve P-256 (ECDH and ECDSA)
+ *
+ * Copyright The Mbed TLS Contributors
+ * Author: Manuel Pégourié-Gonnard.
+ * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+ */
+
+#include "p256-m.h"
+#include "mbedtls/platform_util.h"
+#include "psa/crypto.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined (MBEDTLS_PSA_P256M_DRIVER_ENABLED)
+
+/*
+ * Zeroize memory - this should not be optimized away
+ */
+#define zeroize mbedtls_platform_zeroize
+
+/*
+ * Helpers to test constant-time behaviour with valgrind or MemSan.
+ *
+ * CT_POISON() is used for secret data. It marks the memory area as
+ * uninitialised, so that any branch or pointer dereference that depends on it
+ * (even indirectly) triggers a warning.
+ * CT_UNPOISON() is used for public data; it marks the area as initialised.
+ *
+ * These are macros in order to avoid interfering with origin tracking.
+ */
+#if defined(CT_MEMSAN)
+
+#include <sanitizer/msan_interface.h>
+#define CT_POISON   __msan_allocated_memory
+// void __msan_allocated_memory(const volatile void* data, size_t size);
+#define CT_UNPOISON __msan_unpoison
+// void __msan_unpoison(const volatile void *a, size_t size);
+
+#elif defined(CT_VALGRIND)
+
+#include <valgrind/memcheck.h>
+#define CT_POISON   VALGRIND_MAKE_MEM_UNDEFINED
+// VALGRIND_MAKE_MEM_UNDEFINED(_qzz_addr,_qzz_len)
+#define CT_UNPOISON VALGRIND_MAKE_MEM_DEFINED
+// VALGRIND_MAKE_MEM_DEFINED(_qzz_addr,_qzz_len)
+
+#else
+#define CT_POISON(p, sz)
+#define CT_UNPOISON(p, sz)
+#endif
+
+/**********************************************************************
+ *
+ * Operations on fixed-width unsigned integers
+ *
+ * Represented using 32-bit limbs, least significant limb first.
+ * That is: x = x[0] + 2^32 x[1] + ... + 2^224 x[7] for 256-bit.
+ *
+ **********************************************************************/
+
+/*
+ * 256-bit set to 32-bit value
+ *
+ * in: x in [0, 2^32)
+ * out: z = x
+ */
+static void u256_set32(uint32_t z[8], uint32_t x)
+{
+    z[0] = x;
+    for (unsigned i = 1; i < 8; i++) {
+        z[i] = 0;
+    }
+}
+
+/*
+ * 256-bit addition
+ *
+ * in: x, y in [0, 2^256)
+ * out: z = (x + y) mod 2^256
+ *      c = (x + y) div 2^256
+ * That is, z + c * 2^256 = x + y
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static uint32_t u256_add(uint32_t z[8],
+                         const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t carry = 0;
+
+    for (unsigned i = 0; i < 8; i++) {
+        uint64_t sum = (uint64_t) carry + x[i] + y[i];
+        z[i] = (uint32_t) sum;
+        carry = (uint32_t) (sum >> 32);
+    }
+
+    return carry;
+}
+
+/*
+ * 256-bit subtraction
+ *
+ * in: x, y in [0, 2^256)
+ * out: z = (x - y) mod 2^256
+ *      c = 0 if x >=y, 1 otherwise
+ * That is, z = c * 2^256 + x - y
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static uint32_t u256_sub(uint32_t z[8],
+                         const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t carry = 0;
+
+    for (unsigned i = 0; i < 8; i++) {
+        uint64_t diff = (uint64_t) x[i] - y[i] - carry;
+        z[i] = (uint32_t) diff;
+        carry = -(uint32_t) (diff >> 32);
+    }
+
+    return carry;
+}
+
+/*
+ * 256-bit conditional assignment
+ *
+ * in: x in [0, 2^256)
+ *     c in [0, 1]
+ * out: z = x if c == 1, z unchanged otherwise
+ *
+ * Note: as a memory area, z must be either equal to x, or not overlap.
+ */
+static void u256_cmov(uint32_t z[8], const uint32_t x[8], uint32_t c)
+{
+    const uint32_t x_mask = -c;
+    for (unsigned i = 0; i < 8; i++) {
+        z[i] = (z[i] & ~x_mask) | (x[i] & x_mask);
+    }
+}
+
+/*
+ * 256-bit compare for equality
+ *
+ * in: x in [0, 2^256)
+ *     y in [0, 2^256)
+ * out: 0 if x == y, unspecified non-zero otherwise
+ */
+static uint32_t u256_diff(const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t diff = 0;
+    for (unsigned i = 0; i < 8; i++) {
+        diff |= x[i] ^ y[i];
+    }
+    return diff;
+}
+
+/*
+ * 256-bit compare to zero
+ *
+ * in: x in [0, 2^256)
+ * out: 0 if x == 0, unspecified non-zero otherwise
+ */
+static uint32_t u256_diff0(const uint32_t x[8])
+{
+    uint32_t diff = 0;
+    for (unsigned i = 0; i < 8; i++) {
+        diff |= x[i];
+    }
+    return diff;
+}
+
+/*
+ * 32 x 32 -> 64-bit multiply-and-accumulate
+ *
+ * in: x, y, z, t in [0, 2^32)
+ * out: x * y + z + t in [0, 2^64)
+ *
+ * Note: this computation cannot overflow.
+ *
+ * Note: this function has two pure-C implementations (depending on whether
+ * MUL64_IS_CONSTANT_TIME), and possibly optimised asm implementations.
+ * Start with the potential asm definitions, and use the C definition only if
+ * we no have no asm for the current toolchain & CPU.
+ */
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t);
+
+/* This macro is used to mark whether an asm implentation is found */
+#undef MULADD64_ASM
+/* This macro is used to mark whether the implementation has a small
+ * code size (ie, it can be inlined even in an unrolled loop) */
+#undef MULADD64_SMALL
+
+/*
+ * Currently assembly optimisations are only supported with GCC/Clang for
+ * Arm's Cortex-A and Cortex-M lines of CPUs, which start with the v6-M and
+ * v7-M architectures. __ARM_ARCH_PROFILE is not defined for v6 and earlier.
+ * Thumb and 32-bit assembly is supported; aarch64 is not supported.
+ */
+#if defined(__GNUC__) &&\
+    defined(__ARM_ARCH) && __ARM_ARCH >= 6 && defined(__ARM_ARCH_PROFILE) && \
+    ( __ARM_ARCH_PROFILE == 77 || __ARM_ARCH_PROFILE == 65 ) /* 'M' or 'A' */ && \
+    !defined(__aarch64__)
+
+/*
+ * This set of CPUs is conveniently partitioned as follows:
+ *
+ * 1. Cores that have the DSP extension, which includes a 1-cycle UMAAL
+ *    instruction: M4, M7, M33, all A-class cores.
+ * 2. Cores that don't have the DSP extension, and also lack a constant-time
+ *    64-bit multiplication instruction:
+ *    - M0, M0+, M23: 32-bit multiplication only;
+ *    - M3: 64-bit multiplication is not constant-time.
+ */
+#if defined(__ARM_FEATURE_DSP)
+
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    __asm__(
+        /* UMAAL <RdLo>, <RdHi>, <Rn>, <Rm> */
+        "umaal   %[z], %[t], %[x], %[y]"
+        : [z] "+l" (z), [t] "+l" (t)
+        : [x] "l" (x), [y] "l" (y)
+    );
+    return ((uint64_t) t << 32) | z;
+}
+#define MULADD64_ASM
+#define MULADD64_SMALL
+
+#else /* __ARM_FEATURE_DSP */
+
+/*
+ * This implementation only uses 16x16->32 bit multiplication.
+ *
+ * It decomposes the multiplicands as:
+ *      x = xh:xl = 2^16 * xh + xl
+ *      y = yh:yl = 2^16 * yh + yl
+ * and computes their product as:
+ *      x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
+ * then adds z and t to the result.
+ */
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    /* First compute x*y, using 3 temporary registers */
+    uint32_t tmp1, tmp2, tmp3;
+    __asm__(
+        ".syntax unified\n\t"
+        /* start by splitting the inputs into halves */
+        "lsrs    %[u], %[x], #16\n\t"
+        "lsrs    %[v], %[y], #16\n\t"
+        "uxth    %[x], %[x]\n\t"
+        "uxth    %[y], %[y]\n\t"
+        /* now we have %[x], %[y], %[u], %[v] = xl, yl, xh, yh */
+        /* let's compute the 4 products we can form with those */
+        "movs    %[w], %[v]\n\t"
+        "muls    %[w], %[u]\n\t"
+        "muls    %[v], %[x]\n\t"
+        "muls    %[x], %[y]\n\t"
+        "muls    %[y], %[u]\n\t"
+        /* now we have %[x], %[y], %[v], %[w] = xl*yl, xh*yl, xl*yh, xh*yh */
+        /* let's split and add the first middle product */
+        "lsls    %[u], %[y], #16\n\t"
+        "lsrs    %[y], %[y], #16\n\t"
+        "adds    %[x], %[u]\n\t"
+        "adcs    %[y], %[w]\n\t"
+        /* let's finish with the second middle product */
+        "lsls    %[u], %[v], #16\n\t"
+        "lsrs    %[v], %[v], #16\n\t"
+        "adds    %[x], %[u]\n\t"
+        "adcs    %[y], %[v]\n\t"
+        : [x] "+l" (x), [y] "+l" (y),
+          [u] "=&l" (tmp1), [v] "=&l" (tmp2), [w] "=&l" (tmp3)
+        : /* no read-only inputs */
+        : "cc"
+    );
+    (void) tmp1;
+    (void) tmp2;
+    (void) tmp3;
+
+    /* Add z and t, using one temporary register */
+    __asm__(
+        ".syntax unified\n\t"
+        "movs    %[u], #0\n\t"
+        "adds    %[x], %[z]\n\t"
+        "adcs    %[y], %[u]\n\t"
+        "adds    %[x], %[t]\n\t"
+        "adcs    %[y], %[u]\n\t"
+        : [x] "+l" (x), [y] "+l" (y), [u] "=&l" (tmp1)
+        : [z] "l" (z), [t] "l" (t)
+        : "cc"
+    );
+    (void) tmp1;
+
+    return ((uint64_t) y << 32) | x;
+}
+#define MULADD64_ASM
+
+#endif /* __ARM_FEATURE_DSP */
+
+#endif /* GCC/Clang with Cortex-M/A CPU */
+
+#if !defined(MULADD64_ASM)
+#if defined(MUL64_IS_CONSTANT_TIME)
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    return (uint64_t) x * y + z + t;
+}
+#define MULADD64_SMALL
+#else
+static uint64_t u32_muladd64(uint32_t x, uint32_t y, uint32_t z, uint32_t t)
+{
+    /* x = xl + 2**16 xh, y = yl + 2**16 yh */
+    const uint16_t xl = (uint16_t) x;
+    const uint16_t yl = (uint16_t) y;
+    const uint16_t xh = x >> 16;
+    const uint16_t yh = y >> 16;
+
+    /* x*y = xl*yl + 2**16 (xh*yl + yl*yh) + 2**32 xh*yh
+     *     = lo    + 2**16 (m1    + m2   ) + 2**32 hi    */
+    const uint32_t lo = (uint32_t) xl * yl;
+    const uint32_t m1 = (uint32_t) xh * yl;
+    const uint32_t m2 = (uint32_t) xl * yh;
+    const uint32_t hi = (uint32_t) xh * yh;
+
+    uint64_t acc = lo + ((uint64_t) (hi + (m1 >> 16) + (m2 >> 16)) << 32);
+    acc += m1 << 16;
+    acc += m2 << 16;
+    acc += z;
+    acc += t;
+
+    return acc;
+}
+#endif /* MUL64_IS_CONSTANT_TIME */
+#endif /* MULADD64_ASM */
+
+/*
+ * 288 + 32 x 256 -> 288-bit multiply and add
+ *
+ * in: x in [0, 2^32)
+ *     y in [0, 2^256)
+ *     z in [0, 2^288)
+ * out: z_out = z_in + x * y mod 2^288
+ *      c     = z_in + x * y div 2^288
+ * That is, z_out + c * 2^288 = z_in + x * y
+ *
+ * Note: as a memory area, z must be either equal to y, or not overlap.
+ *
+ * This is a helper for Montgomery multiplication.
+ */
+static uint32_t u288_muladd(uint32_t z[9], uint32_t x, const uint32_t y[8])
+{
+    uint32_t carry = 0;
+
+#define U288_MULADD_STEP(i) \
+    do { \
+        uint64_t prod = u32_muladd64(x, y[i], z[i], carry); \
+        z[i] = (uint32_t) prod; \
+        carry = (uint32_t) (prod >> 32); \
+    } while( 0 )
+
+#if defined(MULADD64_SMALL)
+    U288_MULADD_STEP(0);
+    U288_MULADD_STEP(1);
+    U288_MULADD_STEP(2);
+    U288_MULADD_STEP(3);
+    U288_MULADD_STEP(4);
+    U288_MULADD_STEP(5);
+    U288_MULADD_STEP(6);
+    U288_MULADD_STEP(7);
+#else
+    for (unsigned i = 0; i < 8; i++) {
+        U288_MULADD_STEP(i);
+    }
+#endif
+
+    uint64_t sum = (uint64_t) z[8] + carry;
+    z[8] = (uint32_t) sum;
+    carry = (uint32_t) (sum >> 32);
+
+    return carry;
+}
+
+/*
+ * 288-bit in-place right shift by 32 bits
+ *
+ * in: z in [0, 2^288)
+ *     c in [0, 2^32)
+ * out: z_out = z_in div 2^32 + c * 2^256
+ *            = (z_in + c * 2^288) div 2^32
+ *
+ * This is a helper for Montgomery multiplication.
+ */
+static void u288_rshift32(uint32_t z[9], uint32_t c)
+{
+    for (unsigned i = 0; i < 8; i++) {
+        z[i] = z[i + 1];
+    }
+    z[8] = c;
+}
+
+/*
+ * 256-bit import from big-endian bytes
+ *
+ * in: p = p0, ..., p31
+ * out: z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
+ */
+static void u256_from_bytes(uint32_t z[8], const uint8_t p[32])
+{
+    for (unsigned i = 0; i < 8; i++) {
+        unsigned j = 4 * (7 - i);
+        z[i] = ((uint32_t) p[j + 0] << 24) |
+               ((uint32_t) p[j + 1] << 16) |
+               ((uint32_t) p[j + 2] <<  8) |
+               ((uint32_t) p[j + 3] <<  0);
+    }
+}
+
+/*
+ * 256-bit export to big-endian bytes
+ *
+ * in: z in [0, 2^256)
+ * out: p = p0, ..., p31 such that
+ *      z = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
+ */
+static void u256_to_bytes(uint8_t p[32], const uint32_t z[8])
+{
+    for (unsigned i = 0; i < 8; i++) {
+        unsigned j = 4 * (7 - i);
+        p[j + 0] = (uint8_t) (z[i] >> 24);
+        p[j + 1] = (uint8_t) (z[i] >> 16);
+        p[j + 2] = (uint8_t) (z[i] >>  8);
+        p[j + 3] = (uint8_t) (z[i] >>  0);
+    }
+}
+
+/**********************************************************************
+ *
+ * Operations modulo a 256-bit prime m
+ *
+ * These are done in the Montgomery domain, that is x is represented by
+ *  x * 2^256 mod m
+ * Numbers need to be converted to that domain before computations,
+ * and back from it afterwards.
+ *
+ * Inversion is computed using Fermat's little theorem.
+ *
+ * Assumptions on m:
+ * - Montgomery operations require that m is odd.
+ * - Fermat's little theorem require it to be a prime.
+ * - m256_inv() further requires that m % 2^32 >= 2.
+ * - m256_inv() also assumes that the value of m is not a secret.
+ *
+ * In practice operations are done modulo the curve's p and n,
+ * both of which satisfy those assumptions.
+ *
+ **********************************************************************/
+
+/*
+ * Data associated to a modulus for Montgomery operations.
+ *
+ * m in [0, 2^256) - the modulus itself, must be odd
+ * R2 = 2^512 mod m
+ * ni = -m^-1 mod 2^32
+ */
+typedef struct {
+    uint32_t m[8];
+    uint32_t R2[8];
+    uint32_t ni;
+}
+m256_mod;
+
+/*
+ * Data for Montgomery operations modulo the curve's p
+ */
+static const m256_mod p256_p = {
+    {   /* the curve's p */
+        0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000,
+        0x00000000, 0x00000000, 0x00000001, 0xFFFFFFFF,
+    },
+    {   /* 2^512 mod p */
+        0x00000003, 0x00000000, 0xffffffff, 0xfffffffb,
+        0xfffffffe, 0xffffffff, 0xfffffffd, 0x00000004,
+    },
+    0x00000001, /* -p^-1 mod 2^32 */
+};
+
+/*
+ * Data for Montgomery operations modulo the curve's n
+ */
+static const m256_mod p256_n = {
+    {   /* the curve's n */
+        0xFC632551, 0xF3B9CAC2, 0xA7179E84, 0xBCE6FAAD,
+        0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF,
+    },
+    {   /* 2^512 mod n */
+        0xbe79eea2, 0x83244c95, 0x49bd6fa6, 0x4699799c,
+        0x2b6bec59, 0x2845b239, 0xf3d95620, 0x66e12d94,
+    },
+    0xee00bc4f, /* -n^-1 mod 2^32 */
+};
+
+/*
+ * Modular addition
+ *
+ * in: x, y in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z = (x + y) mod m, in [0, m)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_add(uint32_t z[8],
+                     const uint32_t x[8], const uint32_t y[8],
+                     const m256_mod *mod)
+{
+    uint32_t r[8];
+    uint32_t carry_add = u256_add(z, x, y);
+    uint32_t carry_sub = u256_sub(r, z, mod->m);
+    /* Need to subract m if:
+     *      x+y >= 2^256 > m (that is, carry_add == 1)
+     *   OR z >= m (that is, carry_sub == 0) */
+    uint32_t use_sub = carry_add | (1 - carry_sub);
+    u256_cmov(z, r, use_sub);
+}
+
+/*
+ * Modular addition mod p
+ *
+ * in: x, y in [0, p)
+ * out: z = (x + y) mod p, in [0, p)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_add_p(uint32_t z[8],
+                       const uint32_t x[8], const uint32_t y[8])
+{
+    m256_add(z, x, y, &p256_p);
+}
+
+/*
+ * Modular subtraction
+ *
+ * in: x, y in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z = (x - y) mod m, in [0, m)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_sub(uint32_t z[8],
+                     const uint32_t x[8], const uint32_t y[8],
+                     const m256_mod *mod)
+{
+    uint32_t r[8];
+    uint32_t carry = u256_sub(z, x, y);
+    (void) u256_add(r, z, mod->m);
+    /* Need to add m if and only if x < y, that is carry == 1.
+     * In that case z is in [2^256 - m + 1, 2^256 - 1], so the
+     * addition will have a carry as well, which cancels out. */
+    u256_cmov(z, r, carry);
+}
+
+/*
+ * Modular subtraction mod p
+ *
+ * in: x, y in [0, p)
+ * out: z = (x + y) mod p, in [0, p)
+ *
+ * Note: as a memory area, z must be either equal to x or y, or not overlap.
+ */
+static void m256_sub_p(uint32_t z[8],
+                       const uint32_t x[8], const uint32_t y[8])
+{
+    m256_sub(z, x, y, &p256_p);
+}
+
+/*
+ * Montgomery modular multiplication
+ *
+ * in: x, y in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z = (x * y) / 2^256 mod m, in [0, m)
+ *
+ * Note: as a memory area, z may overlap with x or y.
+ */
+static void m256_mul(uint32_t z[8],
+                     const uint32_t x[8], const uint32_t y[8],
+                     const m256_mod *mod)
+{
+    /*
+     * Algorithm 14.36 in Handbook of Applied Cryptography with:
+     * b = 2^32, n = 8, R = 2^256
+     */
+    uint32_t m_prime = mod->ni;
+    uint32_t a[9];
+
+    for (unsigned i = 0; i < 9; i++) {
+        a[i] = 0;
+    }
+
+    for (unsigned i = 0; i < 8; i++) {
+        /* the "mod 2^32" is implicit from the type */
+        uint32_t u = (a[0] + x[i] * y[0]) * m_prime;
+
+        /* a = (a + x[i] * y + u * m) div b */
+        uint32_t c = u288_muladd(a, x[i], y);
+        c += u288_muladd(a, u, mod->m);
+        u288_rshift32(a, c);
+    }
+
+    /* a = a > m ? a - m : a */
+    uint32_t carry_add = a[8];  // 0 or 1 since a < 2m, see HAC Note 14.37
+    uint32_t carry_sub = u256_sub(z, a, mod->m);
+    uint32_t use_sub = carry_add | (1 - carry_sub);     // see m256_add()
+    u256_cmov(z, a, 1 - use_sub);
+}
+
+/*
+ * Montgomery modular multiplication modulo p.
+ *
+ * in: x, y in [0, p)
+ * out: z = (x * y) / 2^256 mod p, in [0, p)
+ *
+ * Note: as a memory area, z may overlap with x or y.
+ */
+static void m256_mul_p(uint32_t z[8],
+                       const uint32_t x[8], const uint32_t y[8])
+{
+    m256_mul(z, x, y, &p256_p);
+}
+
+/*
+ * In-place conversion to Montgomery form
+ *
+ * in: z in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z_out = z_in * 2^256 mod m, in [0, m)
+ */
+static void m256_prep(uint32_t z[8], const m256_mod *mod)
+{
+    m256_mul(z, z, mod->R2, mod);
+}
+
+/*
+ * In-place conversion from Montgomery form
+ *
+ * in: z in [0, m)
+ *     mod must point to a valid m256_mod structure
+ * out: z_out = z_in / 2^256 mod m, in [0, m)
+ * That is, z_in was z_actual * 2^256 mod m, and z_out is z_actual
+ */
+static void m256_done(uint32_t z[8], const m256_mod *mod)
+{
+    uint32_t one[8];
+    u256_set32(one, 1);
+    m256_mul(z, z, one, mod);
+}
+
+/*
+ * Set to 32-bit value
+ *
+ * in: x in [0, 2^32)
+ *     mod must point to a valid m256_mod structure
+ * out: z = x * 2^256 mod m, in [0, m)
+ * That is, z is set to the image of x in the Montgomery domain.
+ */
+static void m256_set32(uint32_t z[8], uint32_t x, const m256_mod *mod)
+{
+    u256_set32(z, x);
+    m256_prep(z, mod);
+}
+
+/*
+ * Modular inversion in Montgomery form
+ *
+ * in: x in [0, m)
+ *     mod must point to a valid m256_mod structure
+ *     such that mod->m % 2^32 >= 2, assumed to be public.
+ * out: z = x^-1 * 2^512 mod m if x != 0,
+ *      z = 0 if x == 0
+ * That is, if x = x_actual    * 2^256 mod m, then
+ *             z = x_actual^-1 * 2^256 mod m
+ *
+ * Note: as a memory area, z may overlap with x.
+ */
+static void m256_inv(uint32_t z[8], const uint32_t x[8],
+                     const m256_mod *mod)
+{
+    /*
+     * Use Fermat's little theorem to compute x^-1 as x^(m-2).
+     *
+     * Take advantage of the fact that both p's and n's least significant limb
+     * is at least 2 to perform the subtraction on the flight (no carry).
+     *
+     * Use plain right-to-left binary exponentiation;
+     * branches are OK as the exponent is not a secret.
+     */
+    uint32_t bitval[8];
+    u256_cmov(bitval, x, 1);    /* copy x before writing to z */
+
+    m256_set32(z, 1, mod);
+
+    unsigned i = 0;
+    uint32_t limb = mod->m[i] - 2;
+    while (1) {
+        for (unsigned j = 0; j < 32; j++) {
+            if ((limb & 1) != 0) {
+                m256_mul(z, z, bitval, mod);
+            }
+            m256_mul(bitval, bitval, bitval, mod);
+            limb >>= 1;
+        }
+
+        if (i == 7)
+            break;
+
+        i++;
+        limb = mod->m[i];
+    }
+}
+
+/*
+ * Import modular integer from bytes to Montgomery domain
+ *
+ * in: p = p0, ..., p32
+ *     mod must point to a valid m256_mod structure
+ * out: z = (p0 * 2^248 + ... + p31) * 2^256 mod m, in [0, m)
+ *      return 0 if the number was already in [0, m), or -1.
+ *      z may be incorrect and must be discared when -1 is returned.
+ */
+static int m256_from_bytes(uint32_t z[8],
+                           const uint8_t p[32], const m256_mod *mod)
+{
+    u256_from_bytes(z, p);
+
+    uint32_t t[8];
+    uint32_t lt_m = u256_sub(t, z, mod->m);
+    if (lt_m != 1)
+        return -1;
+
+    m256_prep(z, mod);
+    return 0;
+}
+
+/*
+ * Export modular integer from Montgomery domain to bytes
+ *
+ * in: z in [0, 2^256)
+ *     mod must point to a valid m256_mod structure
+ * out: p = p0, ..., p31 such that
+ *      z = (p0 * 2^248 + ... + p31) * 2^256 mod m
+ */
+static void m256_to_bytes(uint8_t p[32],
+                          const uint32_t z[8], const m256_mod *mod)
+{
+    uint32_t zi[8];
+    u256_cmov(zi, z, 1);
+    m256_done(zi, mod);
+
+    u256_to_bytes(p, zi);
+}
+
+/**********************************************************************
+ *
+ * Operations on curve points
+ *
+ * Points are represented in two coordinates system:
+ *  - affine (x, y) - extended to represent 0 (see below)
+ *  - jacobian (x:y:z)
+ * In either case, coordinates are integers modulo p256_p and
+ * are always represented in the Montgomery domain.
+ *
+ * For background on jacobian coordinates, see for example [GECC] 3.2.2:
+ * - conversions go (x, y) -> (x:y:1) and (x:y:z) -> (x/z^2, y/z^3)
+ * - the curve equation becomes y^2 = x^3 - 3 x z^4 + b z^6
+ * - 0 (aka the origin aka point at infinity) is (x:y:0) with y^2 = x^3.
+ * - point negation goes -(x:y:z) = (x:-y:z)
+ *
+ * Normally 0 (the point at infinity) can't be represented in affine
+ * coordinates. However we extend affine coordinates with the convention that
+ * (0, 0) (which is normally not a point on the curve) is interpreted as 0.
+ *
+ * References:
+ * - [GECC]: Guide to Elliptic Curve Cryptography; Hankerson, Menezes,
+ *   Vanstone; Springer, 2004.
+ * - [CMO98]: Efficient Elliptic Curve Exponentiation Using Mixed Coordinates;
+ *   Cohen, Miyaji, Ono; Springer, ASIACRYPT 1998.
+ *   https://link.springer.com/content/pdf/10.1007/3-540-49649-1_6.pdf
+ * - [RCB15]: Complete addition formulas for prime order elliptic curves;
+ *   Renes, Costello, Batina; IACR e-print 2015-1060.
+ *   https://eprint.iacr.org/2015/1060.pdf
+ *
+ **********************************************************************/
+
+/*
+ * The curve's b parameter in the Short Weierstrass equation
+ *  y^2 = x^3 - 3*x + b
+ * Compared to the standard, this is converted to the Montgomery domain.
+ */
+static const uint32_t p256_b[8] = { /* b * 2^256 mod p */
+    0x29c4bddf, 0xd89cdf62, 0x78843090, 0xacf005cd,
+    0xf7212ed6, 0xe5a220ab, 0x04874834, 0xdc30061d,
+};
+
+/*
+ * The curve's conventional base point G.
+ * Compared to the standard, coordinates converted to the Montgomery domain.
+ */
+static const uint32_t p256_gx[8] = { /* G_x * 2^256 mod p */
+    0x18a9143c, 0x79e730d4, 0x5fedb601, 0x75ba95fc,
+    0x77622510, 0x79fb732b, 0xa53755c6, 0x18905f76,
+};
+static const uint32_t p256_gy[8] = { /* G_y * 2^256 mod p */
+    0xce95560a, 0xddf25357, 0xba19e45c, 0x8b4ab8e4,
+    0xdd21f325, 0xd2e88688, 0x25885d85, 0x8571ff18,
+};
+
+/*
+ * Point-on-curve check - do the coordinates satisfy the curve's equation?
+ *
+ * in: x, y in [0, p)   (Montgomery domain)
+ * out: 0 if the point lies on the curve and is not 0,
+ *      unspecified non-zero otherwise
+ */
+static uint32_t point_check(const uint32_t x[8], const uint32_t y[8])
+{
+    uint32_t lhs[8], rhs[8];
+
+    /* lhs = y^2 */
+    m256_mul_p(lhs, y, y);
+
+    /* rhs = x^3 - 3x + b */
+    m256_mul_p(rhs, x,   x);      /* x^2 */
+    m256_mul_p(rhs, rhs, x);      /* x^3 */
+    for (unsigned i = 0; i < 3; i++)
+        m256_sub_p(rhs, rhs, x);  /* x^3 - 3x */
+    m256_add_p(rhs, rhs, p256_b); /* x^3 - 3x + b */
+
+    return u256_diff(lhs, rhs);
+}
+
+/*
+ * In-place jacobian to affine coordinate conversion
+ *
+ * in: (x:y:z) must be on the curve (coordinates in Montegomery domain)
+ * out: x_out = x_in / z_in^2   (Montgomery domain)
+ *      y_out = y_in / z_in^3   (Montgomery domain)
+ *      z_out unspecified, must be disregarded
+ *
+ * Note: if z is 0 (that is, the input point is 0), x_out = y_out = 0.
+ */
+static void point_to_affine(uint32_t x[8], uint32_t y[8], uint32_t z[8])
+{
+    uint32_t t[8];
+
+    m256_inv(z, z, &p256_p);    /* z = z^-1 */
+
+    m256_mul_p(t, z, z);        /* t = z^-2 */
+    m256_mul_p(x, x, t);        /* x = x * z^-2 */
+
+    m256_mul_p(t, t, z);        /* t = z^-3 */
+    m256_mul_p(y, y, t);        /* y = y * z^-3 */
+}
+
+/*
+ * In-place point doubling in jacobian coordinates (Montgomery domain)
+ *
+ * in: P_in = (x:y:z), must be on the curve
+ * out: (x:y:z) = P_out = 2 * P_in
+ */
+static void point_double(uint32_t x[8], uint32_t y[8], uint32_t z[8])
+{
+    /*
+     * This is formula 6 from [CMO98], cited as complete in [RCB15] (table 1).
+     * Notations as in the paper, except u added and t ommited (it's x3).
+     */
+    uint32_t m[8], s[8], u[8];
+
+    /* m = 3 * x^2 + a * z^4 = 3 * (x + z^2) * (x - z^2) */
+    m256_mul_p(s, z, z);
+    m256_add_p(m, x, s);
+    m256_sub_p(u, x, s);
+    m256_mul_p(s, m, u);
+    m256_add_p(m, s, s);
+    m256_add_p(m, m, s);
+
+    /* s = 4 * x * y^2 */
+    m256_mul_p(u, y, y);
+    m256_add_p(u, u, u); /* u = 2 * y^2 (used below) */
+    m256_mul_p(s, x, u);
+    m256_add_p(s, s, s);
+
+    /* u = 8 * y^4 (not named in the paper, first term of y3) */
+    m256_mul_p(u, u, u);
+    m256_add_p(u, u, u);
+
+    /* x3 = t = m^2 - 2 * s */
+    m256_mul_p(x, m, m);
+    m256_sub_p(x, x, s);
+    m256_sub_p(x, x, s);
+
+    /* z3 = 2 * y * z */
+    m256_mul_p(z, y, z);
+    m256_add_p(z, z, z);
+
+    /* y3 = -u + m * (s - t) */
+    m256_sub_p(y, s, x);
+    m256_mul_p(y, y, m);
+    m256_sub_p(y, y, u);
+}
+
+/*
+ * In-place point addition in jacobian-affine coordinates (Montgomery domain)
+ *
+ * in: P_in = (x1:y1:z1), must be on the curve and not 0
+ *     Q = (x2, y2), must be on the curve and not P_in or -P_in or 0
+ * out: P_out = (x1:y1:z1) = P_in + Q
+ */
+static void point_add(uint32_t x1[8], uint32_t y1[8], uint32_t z1[8],
+                      const uint32_t x2[8], const uint32_t y2[8])
+{
+    /*
+     * This is formula 5 from [CMO98], with z2 == 1 substituted. We use
+     * intermediates with neutral names, and names from the paper in comments.
+     */
+    uint32_t t1[8], t2[8], t3[8];
+
+    /* u1 = x1 and s1 = y1 (no computations) */
+
+    /* t1 = u2 = x2 z1^2 */
+    m256_mul_p(t1, z1, z1);
+    m256_mul_p(t2, t1, z1);
+    m256_mul_p(t1, t1, x2);
+
+    /* t2 = s2 = y2 z1^3 */
+    m256_mul_p(t2, t2, y2);
+
+    /* t1 = h = u2 - u1 */
+    m256_sub_p(t1, t1, x1); /* t1 = x2 * z1^2 - x1 */
+
+    /* t2 = r = s2 - s1 */
+    m256_sub_p(t2, t2, y1);
+
+    /* z3 = z1 * h */
+    m256_mul_p(z1, z1, t1);
+
+    /* t1 = h^3 */
+    m256_mul_p(t3, t1, t1);
+    m256_mul_p(t1, t3, t1);
+
+    /* t3 = x1 * h^2 */
+    m256_mul_p(t3, t3, x1);
+
+    /* x3 = r^2 - 2 * x1 * h^2 - h^3 */
+    m256_mul_p(x1, t2, t2);
+    m256_sub_p(x1, x1, t3);
+    m256_sub_p(x1, x1, t3);
+    m256_sub_p(x1, x1, t1);
+
+    /* y3 = r * (x1 * h^2 - x3) - y1 h^3 */
+    m256_sub_p(t3, t3, x1);
+    m256_mul_p(t3, t3, t2);
+    m256_mul_p(t1, t1, y1);
+    m256_sub_p(y1, t3, t1);
+}
+
+/*
+ * Point addition or doubling (affine to jacobian, Montgomery domain)
+ *
+ * in: P = (x1, y1) - must be on the curve and not 0
+ *     Q = (x2, y2) - must be on the curve and not 0
+ * out: (x3, y3) = R = P + Q
+ *
+ * Note: unlike point_add(), this function works if P = +- Q;
+ * however it leaks information on its input through timing,
+ * branches taken and memory access patterns (if observable).
+ */
+static void point_add_or_double_leaky(
+                        uint32_t x3[8], uint32_t y3[8],
+                        const uint32_t x1[8], const uint32_t y1[8],
+                        const uint32_t x2[8], const uint32_t y2[8])
+{
+
+    uint32_t z3[8];
+    u256_cmov(x3, x1, 1);
+    u256_cmov(y3, y1, 1);
+    m256_set32(z3, 1, &p256_p);
+
+    if (u256_diff(x1, x2) != 0) {
+        // P != +- Q -> generic addition
+        point_add(x3, y3, z3, x2, y2);
+        point_to_affine(x3, y3, z3);
+    }
+    else if (u256_diff(y1, y2) == 0) {
+        // P == Q -> double
+        point_double(x3, y3, z3);
+        point_to_affine(x3, y3, z3);
+    } else {
+        // P == -Q -> zero
+        m256_set32(x3, 0, &p256_p);
+        m256_set32(y3, 0, &p256_p);
+    }
+}
+
+/*
+ * Import curve point from bytes
+ *
+ * in: p = (x, y) concatenated, fixed-width 256-bit big-endian integers
+ * out: x, y in Mongomery domain
+ *      return 0 if x and y are both in [0, p)
+ *                  and (x, y) is on the curve and not 0
+ *             unspecified non-zero otherwise.
+ *      x and y are unspecified and must be discarded if returning non-zero.
+ */
+static int point_from_bytes(uint32_t x[8], uint32_t y[8], const uint8_t p[64])
+{
+    int ret;
+
+    ret = m256_from_bytes(x, p, &p256_p);
+    if (ret != 0)
+        return ret;
+
+    ret = m256_from_bytes(y, p + 32, &p256_p);
+    if (ret != 0)
+        return ret;
+
+    return (int) point_check(x, y);
+}
+
+/*
+ * Export curve point to bytes
+ *
+ * in: x, y affine coordinates of a point (Montgomery domain)
+ *     must be on the curve and not 0
+ * out: p = (x, y) concatenated, fixed-width 256-bit big-endian integers
+ */
+static void point_to_bytes(uint8_t p[64],
+                           const uint32_t x[8], const uint32_t y[8])
+{
+    m256_to_bytes(p,        x, &p256_p);
+    m256_to_bytes(p + 32,   y, &p256_p);
+}
+
+/**********************************************************************
+ *
+ * Scalar multiplication and other scalar-related operations
+ *
+ **********************************************************************/
+
+/*
+ * Scalar multiplication
+ *
+ * in: P = (px, py), affine (Montgomery), must be on the curve and not 0
+ *     s in [1, n-1]
+ * out: R = s * P = (rx, ry), affine coordinates (Montgomery).
+ *
+ * Note: as memory areas, none of the parameters may overlap.
+ */
+static void scalar_mult(uint32_t rx[8], uint32_t ry[8],
+                        const uint32_t px[8], const uint32_t py[8],
+                        const uint32_t s[8])
+{
+    /*
+     * We use a signed binary ladder, see for example slides 10-14 of
+     * http://ecc2015.math.u-bordeaux1.fr/documents/hamburg.pdf but with
+     * implicit recoding, and a different loop initialisation to avoid feeding
+     * 0 to our addition formulas, as they don't support it.
+     */
+    uint32_t s_odd[8], py_neg[8], py_use[8], rz[8];
+
+    /*
+     * Make s odd by replacing it with n - s if necessary.
+     *
+     * If s was odd, we'll have s_odd = s, and define P' = P.
+     * Otherwise, we'll have s_odd = n - s and define P' = -P.
+     *
+     * Either way, we can compute s * P as s_odd * P'.
+     */
+    u256_sub(s_odd, p256_n.m, s); /* no carry, result still in [1, n-1] */
+    uint32_t negate = ~s[0] & 1;
+    u256_cmov(s_odd, s, 1 - negate);
+
+    /* Compute py_neg = - py mod p (that's the y coordinate of -P) */
+    u256_set32(py_use, 0);
+    m256_sub_p(py_neg, py_use, py);
+
+    /* Initialize R = P' = (x:(-1)^negate * y:1) */
+    u256_cmov(rx, px, 1);
+    u256_cmov(ry, py, 1);
+    m256_set32(rz, 1, &p256_p);
+    u256_cmov(ry, py_neg, negate);
+
+    /*
+     * For any odd number s_odd = b255 ... b1 1, we have
+     *      s_odd = 2^255 + 2^254 sbit(b255) + ... + 2 sbit(b2) + sbit(b1)
+     * writing
+     *      sbit(b) = 2 * b - 1 = b ? 1 : -1
+     *
+     * Use that to compute s_odd * P' by repeating R = 2 * R +- P':
+     *      s_odd * P' = 2 * ( ... (2 * P' + sbit(b255) P') ... ) + sbit(b1) P'
+     *
+     * The loop invariant is that when beginning an iteration we have
+     *      R = s_i P'
+     * with
+     *      s_i = 2^(255-i) + 2^(254-i) sbit(b_255) + ...
+     * where the sum has 256 - i terms.
+     *
+     * When updating R we need to make sure the input to point_add() is
+     * neither 0 not +-P'. Since that input is 2 s_i P', it is sufficient to
+     * see that 1 < 2 s_i < n-1. The lower bound is obvious since s_i is a
+     * positive integer, and for the upper bound we distinguish three cases.
+     *
+     * If i > 1, then s_i < 2^254, so 2 s_i < 2^255 < n-1.
+     * Otherwise, i == 1 and we have 2 s_i = s_odd - sbit(b1).
+     *      If s_odd <= n-4, then 2 s_1 <= n-3.
+     *      Otherwise, s_odd = n-2, and for this curve's value of n,
+     *      we have b1 == 1, so sbit(b1) = 1 and 2 s_1 <= n-3.
+     */
+    for (unsigned i = 255; i > 0; i--) {
+        uint32_t bit = (s_odd[i / 32] >> i % 32) & 1;
+
+        /* set (px, py_use) = sbit(bit) P' = sbit(bit) * (-1)^negate P */
+        u256_cmov(py_use, py, bit ^ negate);
+        u256_cmov(py_use, py_neg, (1 - bit) ^ negate);
+
+        /* Update R = 2 * R +- P' */
+        point_double(rx, ry, rz);
+        point_add(rx, ry, rz, px, py_use);
+    }
+
+    point_to_affine(rx, ry, rz);
+}
+
+/*
+ * Scalar import from big-endian bytes
+ *
+ * in: p = p0, ..., p31
+ * out: s = p0 * 2^248 + p1 * 2^240 + ... + p30 * 2^8 + p31
+ *      return 0 if s in [1, n-1],
+ *            -1 otherwise.
+ */
+static int scalar_from_bytes(uint32_t s[8], const uint8_t p[32])
+{
+    u256_from_bytes(s, p);
+
+    uint32_t r[8];
+    uint32_t lt_n = u256_sub(r, s, p256_n.m);
+
+    u256_set32(r, 1);
+    uint32_t lt_1 = u256_sub(r, s, r);
+
+    if (lt_n && !lt_1)
+        return 0;
+
+    return -1;
+}
+
+/* Using RNG functions from Mbed TLS as p256-m does not come with a
+ * cryptographically secure RNG function.
+ */
+int p256_generate_random(uint8_t *output, unsigned output_size)
+{
+    int ret;
+    ret = psa_generate_random(output, output_size);
+
+    if (ret != 0){
+        return P256_RANDOM_FAILED;
+    }
+    return P256_SUCCESS;
+}
+
+/*
+ * Scalar generation, with public key
+ *
+ * out: sbytes the big-endian bytes representation of the scalar
+ *      s its u256 representation
+ *      x, y the affine coordinates of s * G (Montgomery domain)
+ *      return 0 if OK, -1 on failure
+ *      sbytes, s, x, y must be discarded when returning non-zero.
+ */
+static int scalar_gen_with_pub(uint8_t sbytes[32], uint32_t s[8],
+                               uint32_t x[8], uint32_t y[8])
+{
+    /* generate a random valid scalar */
+    int ret;
+    unsigned nb_tried = 0;
+    do {
+        if (nb_tried++ >= 4)
+            return -1;
+
+        ret = p256_generate_random(sbytes, 32);
+        CT_POISON(sbytes, 32);
+        if (ret != 0)
+            return -1;
+
+        ret = scalar_from_bytes(s, sbytes);
+        CT_UNPOISON(&ret, sizeof ret);
+    }
+    while (ret != 0);
+
+    /* compute and ouput the associated public key */
+    scalar_mult(x, y, p256_gx, p256_gy, s);
+
+    /* the associated public key is not a secret */
+    CT_UNPOISON(x, 32);
+    CT_UNPOISON(y, 32);
+
+    return 0;
+}
+
+/*
+ * ECDH/ECDSA generate pair
+ */
+int p256_gen_keypair(uint8_t priv[32], uint8_t pub[64])
+{
+    uint32_t s[8], x[8], y[8];
+    int ret = scalar_gen_with_pub(priv, s, x, y);
+    zeroize(s, sizeof s);
+    if (ret != 0)
+        return P256_RANDOM_FAILED;
+
+    point_to_bytes(pub, x, y);
+    return 0;
+}
+
+/**********************************************************************
+ *
+ * ECDH
+ *
+ **********************************************************************/
+
+/*
+ * ECDH compute shared secret
+ */
+int p256_ecdh_shared_secret(uint8_t secret[32],
+                            const uint8_t priv[32], const uint8_t peer[64])
+{
+    CT_POISON(priv, 32);
+
+    uint32_t s[8], px[8], py[8], x[8], y[8];
+    int ret;
+
+    ret = scalar_from_bytes(s, priv);
+    CT_UNPOISON(&ret, sizeof ret);
+    if (ret != 0) {
+        ret = P256_INVALID_PRIVKEY;
+        goto cleanup;
+    }
+
+    ret = point_from_bytes(px, py, peer);
+    if (ret != 0) {
+        ret = P256_INVALID_PUBKEY;
+        goto cleanup;
+    }
+
+    scalar_mult(x, y, px, py, s);
+
+    m256_to_bytes(secret, x, &p256_p);
+    CT_UNPOISON(secret, 32);
+
+cleanup:
+    zeroize(s, sizeof s);
+    return ret;
+}
+
+/**********************************************************************
+ *
+ * ECDSA
+ *
+ * Reference:
+ * [SEC1] SEC 1: Elliptic Curve Cryptography, Certicom research, 2009.
+ *        http://www.secg.org/sec1-v2.pdf
+ **********************************************************************/
+
+/*
+ * Reduction mod n of a small number
+ *
+ * in: x in [0, 2^256)
+ * out: x_out = x_in mod n in [0, n)
+ */
+static void ecdsa_m256_mod_n(uint32_t x[8])
+{
+    uint32_t t[8];
+    uint32_t c = u256_sub(t, x, p256_n.m);
+    u256_cmov(x, t, 1 - c);
+}
+
+/*
+ * Import integer mod n (Montgomery domain) from hash
+ *
+ * in: h = h0, ..., h_hlen
+ *     hlen the length of h in bytes
+ * out: z = (h0 * 2^l-8 + ... + h_l) * 2^256 mod n
+ *      with l = min(32, hlen)
+ *
+ * Note: in [SEC1] this is step 5 of 4.1.3 (sign) or step 3 or 4.1.4 (verify),
+ * with obvious simplications since n's bit-length is a multiple of 8.
+ */
+static void ecdsa_m256_from_hash(uint32_t z[8],
+                                 const uint8_t *h, size_t hlen)
+{
+    /* convert from h (big-endian) */
+    /* hlen is public data so it's OK to branch on it */
+    if (hlen < 32) {
+        uint8_t p[32];
+        for (unsigned i = 0; i < 32; i++)
+            p[i] = 0;
+        for (unsigned i = 0; i < hlen; i++)
+            p[32 - hlen + i] = h[i];
+        u256_from_bytes(z, p);
+    } else {
+        u256_from_bytes(z, h);
+    }
+
+    /* ensure the result is in [0, n) */
+    ecdsa_m256_mod_n(z);
+
+    /* map to Montgomery domain */
+    m256_prep(z, &p256_n);
+}
+
+/*
+ * ECDSA sign
+ */
+int p256_ecdsa_sign(uint8_t sig[64], const uint8_t priv[32],
+                    const uint8_t *hash, size_t hlen)
+{
+    CT_POISON(priv, 32);
+
+    /*
+     * Steps and notations from [SEC1] 4.1.3
+     *
+     * Instead of retrying on r == 0 or s == 0, just abort,
+     * as those events have negligible probability.
+     */
+    int ret;
+
+    /* Temporary buffers - the first two are mostly stable, so have names */
+    uint32_t xr[8], k[8], t3[8], t4[8];
+
+    /* 1. Set ephemeral keypair */
+    uint8_t *kb = (uint8_t *) t4;
+    /* kb will be erased by re-using t4 for dU - if we exit before that, we
+     * haven't read the private key yet so we kb isn't sensitive yet */
+    ret = scalar_gen_with_pub(kb, k, xr, t3);   /* xr = x_coord(k * G) */
+    if (ret != 0)
+        return P256_RANDOM_FAILED;
+    m256_prep(k, &p256_n);
+
+    /* 2. Convert xr to an integer */
+    m256_done(xr, &p256_p);
+
+    /* 3. Reduce xr mod n (extra: output it while at it) */
+    ecdsa_m256_mod_n(xr);    /* xr = int(xr) mod n */
+
+    /* xr is public data so it's OK to use a branch */
+    if (u256_diff0(xr) == 0)
+        return P256_RANDOM_FAILED;
+
+    u256_to_bytes(sig, xr);
+
+    m256_prep(xr, &p256_n);
+
+    /* 4. Skipped - we take the hash as an input, not the message */
+
+    /* 5. Derive an integer from the hash */
+    ecdsa_m256_from_hash(t3, hash, hlen);   /* t3 = e */
+
+    /* 6. Compute s = k^-1 * (e + r * dU) */
+
+    /* Note: dU will be erased by re-using t4 for the value of s (public) */
+    ret = scalar_from_bytes(t4, priv);   /* t4 = dU (integer domain) */
+    CT_UNPOISON(&ret, sizeof ret); /* Result of input validation */
+    if (ret != 0)
+        return P256_INVALID_PRIVKEY;
+    m256_prep(t4, &p256_n);         /* t4 = dU (Montgomery domain) */
+
+    m256_inv(k, k, &p256_n);        /* k^-1 */
+    m256_mul(t4, xr, t4, &p256_n);  /* t4 = r * dU */
+    m256_add(t4, t3, t4, &p256_n);  /* t4 = e + r * dU */
+    m256_mul(t4, k, t4, &p256_n);   /* t4 = s = k^-1 * (e + r * dU) */
+    zeroize(k, sizeof k);
+
+    /* 7. Output s (r already outputed at step 3) */
+    CT_UNPOISON(t4, 32);
+    if (u256_diff0(t4) == 0) {
+        /* undo early output of r */
+        u256_to_bytes(sig, t4);
+        return P256_RANDOM_FAILED;
+    }
+    m256_to_bytes(sig + 32, t4, &p256_n);
+
+    return P256_SUCCESS;
+}
+
+/*
+ * ECDSA verify
+ */
+int p256_ecdsa_verify(const uint8_t sig[64], const uint8_t pub[64],
+                      const uint8_t *hash, size_t hlen)
+{
+    /*
+     * Steps and notations from [SEC1] 4.1.3
+     *
+     * Note: we're using public data only, so branches are OK
+     */
+    int ret;
+
+    /* 1. Validate range of r and s : [1, n-1] */
+    uint32_t r[8], s[8];
+    ret = scalar_from_bytes(r, sig);
+    if (ret != 0)
+        return P256_INVALID_SIGNATURE;
+    ret = scalar_from_bytes(s, sig + 32);
+    if (ret != 0)
+        return P256_INVALID_SIGNATURE;
+
+    /* 2. Skipped - we take the hash as an input, not the message */
+
+    /* 3. Derive an integer from the hash */
+    uint32_t e[8];
+    ecdsa_m256_from_hash(e, hash, hlen);
+
+    /* 4. Compute u1 = e * s^-1 and u2 = r * s^-1 */
+    uint32_t u1[8], u2[8];
+    m256_prep(s, &p256_n);           /* s in Montgomery domain */
+    m256_inv(s, s, &p256_n);         /* s = s^-1 mod n */
+    m256_mul(u1, e, s, &p256_n);     /* u1 = e * s^-1 mod n */
+    m256_done(u1, &p256_n);          /* u1 out of Montgomery domain */
+
+    u256_cmov(u2, r, 1);
+    m256_prep(u2, &p256_n);          /* r in Montgomery domain */
+    m256_mul(u2, u2, s, &p256_n);    /* u2 = r * s^-1 mod n */
+    m256_done(u2, &p256_n);          /* u2 out of Montgomery domain */
+
+    /* 5. Compute R (and re-use (u1, u2) to store its coordinates */
+    uint32_t px[8], py[8];
+    ret = point_from_bytes(px, py, pub);
+    if (ret != 0)
+        return P256_INVALID_PUBKEY;
+
+    scalar_mult(e, s, px, py, u2);      /* (e, s) = R2 = u2 * Qu */
+
+    if (u256_diff0(u1) == 0) {
+        /* u1 out of range for scalar_mult() - just skip it */
+        u256_cmov(u1, e, 1);
+        /* we don't care about the y coordinate */
+    } else {
+        scalar_mult(px, py, p256_gx, p256_gy, u1); /* (px, py) = R1 = u1 * G */
+
+        /* (u1, u2) = R = R1 + R2 */
+        point_add_or_double_leaky(u1, u2, px, py, e, s);
+        /* No need to check if R == 0 here: if that's the case, it will be
+         * caught when comparating rx (which will be 0) to r (which isn't). */
+    }
+
+    /* 6. Convert xR to an integer */
+    m256_done(u1, &p256_p);
+
+    /* 7. Reduce xR mod n */
+    ecdsa_m256_mod_n(u1);
+
+    /* 8. Compare xR mod n to r */
+    uint32_t diff = u256_diff(u1, r);
+    if (diff == 0)
+        return P256_SUCCESS;
+
+    return P256_INVALID_SIGNATURE;
+}
+
+/**********************************************************************
+ *
+ * Key management utilities
+ *
+ **********************************************************************/
+
+int p256_validate_pubkey(const uint8_t pub[64])
+{
+    uint32_t x[8], y[8];
+    int ret = point_from_bytes(x, y, pub);
+
+    return ret == 0 ? P256_SUCCESS : P256_INVALID_PUBKEY;
+}
+
+int p256_validate_privkey(const uint8_t priv[32])
+{
+    uint32_t s[8];
+    int ret = scalar_from_bytes(s, priv);
+    zeroize(s, sizeof(s));
+
+    return ret == 0 ? P256_SUCCESS : P256_INVALID_PRIVKEY;
+}
+
+int p256_public_from_private(uint8_t pub[64], const uint8_t priv[32])
+{
+    int ret;
+    uint32_t s[8];
+
+    ret = scalar_from_bytes(s, priv);
+    if (ret != 0)
+        return P256_INVALID_PRIVKEY;
+
+    /* compute and ouput the associated public key */
+    uint32_t x[8], y[8];
+    scalar_mult(x, y, p256_gx, p256_gy, s);
+
+    /* the associated public key is not a secret, the scalar was */
+    CT_UNPOISON(x, 32);
+    CT_UNPOISON(y, 32);
+    zeroize(s, sizeof(s));
+
+    point_to_bytes(pub, x, y);
+    return P256_SUCCESS;
+}
+
+#endif

diff --git a/3rdparty/p256-m/p256-m/p256-m.h b/3rdparty/p256-m/p256-m/p256-m.h
new file mode 100644
index 0000000..c267800
--- /dev/null
+++ b/3rdparty/p256-m/p256-m/p256-m.h

@@ -0,0 +1,135 @@
+/*
+ * Interface of curve P-256 (ECDH and ECDSA)
+ *
+ * Copyright The Mbed TLS Contributors
+ * Author: Manuel Pégourié-Gonnard.
+ * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+ */
+#ifndef P256_M_H
+#define P256_M_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Status codes */
+#define P256_SUCCESS            0
+#define P256_RANDOM_FAILED      -1
+#define P256_INVALID_PUBKEY     -2
+#define P256_INVALID_PRIVKEY    -3
+#define P256_INVALID_SIGNATURE  -4
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * RNG function - must be provided externally and be cryptographically secure.
+ *
+ * in: output - must point to a writable buffer of at least output_size bytes.
+ *     output_size - the number of random bytes to write to output.
+ * out: output is filled with output_size random bytes.
+ *      return 0 on success, non-zero on errors.
+ */
+extern int p256_generate_random(uint8_t * output, unsigned output_size);
+
+/*
+ * ECDH/ECDSA generate key pair
+ *
+ * [in] draws from p256_generate_random()
+ * [out] priv: on success, holds the private key, as a big-endian integer
+ * [out] pub: on success, holds the public key, as two big-endian integers
+ *
+ * return:  P256_SUCCESS on success
+ *          P256_RANDOM_FAILED on failure
+ */
+int p256_gen_keypair(uint8_t priv[32], uint8_t pub[64]);
+
+/*
+ * ECDH compute shared secret
+ *
+ * [out] secret: on success, holds the shared secret, as a big-endian integer
+ * [in] priv: our private key as a big-endian integer
+ * [in] pub: the peer's public key, as two big-endian integers
+ *
+ * return:  P256_SUCCESS on success
+ *          P256_INVALID_PRIVKEY if priv is invalid
+ *          P256_INVALID_PUBKEY if pub is invalid
+ */
+int p256_ecdh_shared_secret(uint8_t secret[32],
+                            const uint8_t priv[32], const uint8_t pub[64]);
+
+/*
+ * ECDSA sign
+ *
+ * [in] draws from p256_generate_random()
+ * [out] sig: on success, holds the signature, as two big-endian integers
+ * [in] priv: our private key as a big-endian integer
+ * [in] hash: the hash of the message to be signed
+ * [in] hlen: the size of hash in bytes
+ *
+ * return:  P256_SUCCESS on success
+ *          P256_RANDOM_FAILED on failure
+ *          P256_INVALID_PRIVKEY if priv is invalid
+ */
+int p256_ecdsa_sign(uint8_t sig[64], const uint8_t priv[32],
+                    const uint8_t *hash, size_t hlen);
+
+/*
+ * ECDSA verify
+ *
+ * [in] sig: the signature to be verified, as two big-endian integers
+ * [in] pub: the associated public key, as two big-endian integers
+ * [in] hash: the hash of the message that was signed
+ * [in] hlen: the size of hash in bytes
+ *
+ * return:  P256_SUCCESS on success - the signature was verified as valid
+ *          P256_INVALID_PUBKEY if pub is invalid
+ *          P256_INVALID_SIGNATURE if the signature was found to be invalid
+ */
+int p256_ecdsa_verify(const uint8_t sig[64], const uint8_t pub[64],
+                      const uint8_t *hash, size_t hlen);
+
+/*
+ * Public key validation
+ *
+ * Note: you never need to call this function, as all other functions always
+ * validate their input; however it's availabe if you want to validate the key
+ * without performing an operation.
+ *
+ * [in] pub: the public key, as two big-endian integers
+ *
+ * return:  P256_SUCCESS if the key is valid
+ *          P256_INVALID_PUBKEY if pub is invalid
+ */
+int p256_validate_pubkey(const uint8_t pub[64]);
+
+/*
+ * Private key validation
+ *
+ * Note: you never need to call this function, as all other functions always
+ * validate their input; however it's availabe if you want to validate the key
+ * without performing an operation.
+ *
+ * [in] priv: the private key, as a big-endian integer
+ *
+ * return:  P256_SUCCESS if the key is valid
+ *          P256_INVALID_PRIVKEY if priv is invalid
+ */
+int p256_validate_privkey(const uint8_t priv[32]);
+
+/*
+ * Compute public key from private key
+ *
+ * [out] pub: the associated public key, as two big-endian integers
+ * [in] priv: the private key, as a big-endian integer
+ *
+ * return:  P256_SUCCESS on success
+ *          P256_INVALID_PRIVKEY if priv is invalid
+ */
+int p256_public_from_private(uint8_t pub[64], const uint8_t priv[32]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* P256_M_H */

diff --git a/3rdparty/p256-m/p256-m_driver_entrypoints.c b/3rdparty/p256-m/p256-m_driver_entrypoints.c
new file mode 100644
index 0000000..d272dcb
--- /dev/null
+++ b/3rdparty/p256-m/p256-m_driver_entrypoints.c

@@ -0,0 +1,312 @@
+/*
+ *  Driver entry points for p256-m
+ */
+/*
+ *  Copyright The Mbed TLS Contributors
+ *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+ */
+
+#include "mbedtls/platform.h"
+#include "p256-m_driver_entrypoints.h"
+#include "p256-m/p256-m.h"
+#include "psa/crypto.h"
+#include <stddef.h>
+#include <string.h>
+#include "psa_crypto_driver_wrappers_no_static.h"
+
+#if defined(MBEDTLS_PSA_P256M_DRIVER_ENABLED)
+
+/* INFORMATION ON PSA KEY EXPORT FORMATS:
+ *
+ * PSA exports SECP256R1 keys in two formats:
+ * 1. Keypair format: 32 byte string which is just the private key (public key
+ *                    can be calculated from the private key)
+ * 2. Public Key format: A leading byte 0x04 (indicating uncompressed format),
+ *                       followed by the 64 byte public key. This results in a
+ *                       total of 65 bytes.
+ *
+ * p256-m's internal format for private keys matches PSA. Its format for public
+ * keys is only 64 bytes: the same as PSA but without the leading byte (0x04).
+ * Hence, when passing public keys from PSA to p256-m, the leading byte is
+ * removed.
+ *
+ * Shared secret and signature have the same format between PSA and p256-m.
+ */
+#define PSA_PUBKEY_SIZE         65
+#define PSA_PUBKEY_HEADER_BYTE  0x04
+#define P256_PUBKEY_SIZE        64
+#define PRIVKEY_SIZE            32
+#define SHARED_SECRET_SIZE      32
+#define SIGNATURE_SIZE          64
+
+#define CURVE_BITS              256
+
+/* Convert between p256-m and PSA error codes */
+static psa_status_t p256_to_psa_error(int ret)
+{
+    switch (ret) {
+        case P256_SUCCESS:
+            return PSA_SUCCESS;
+        case P256_INVALID_PUBKEY:
+        case P256_INVALID_PRIVKEY:
+            return PSA_ERROR_INVALID_ARGUMENT;
+        case P256_INVALID_SIGNATURE:
+            return PSA_ERROR_INVALID_SIGNATURE;
+        case P256_RANDOM_FAILED:
+        default:
+            return PSA_ERROR_GENERIC_ERROR;
+    }
+}
+
+psa_status_t p256_transparent_import_key(const psa_key_attributes_t *attributes,
+                             const uint8_t *data,
+                             size_t data_length,
+                             uint8_t *key_buffer,
+                             size_t key_buffer_size,
+                             size_t *key_buffer_length,
+                             size_t *bits)
+{
+    /* Check the key size */
+    if (*bits != 0 && *bits != CURVE_BITS) {
+        return PSA_ERROR_NOT_SUPPORTED;
+    }
+
+    /* Validate the key (and its type and size) */
+    psa_key_type_t type = psa_get_key_type(attributes);
+    if (type == PSA_KEY_TYPE_ECC_PUBLIC_KEY(PSA_ECC_FAMILY_SECP_R1)) {
+        if (data_length != PSA_PUBKEY_SIZE) {
+            return *bits == 0 ? PSA_ERROR_NOT_SUPPORTED : PSA_ERROR_INVALID_ARGUMENT;
+        }
+        /* See INFORMATION ON PSA KEY EXPORT FORMATS near top of file */
+        if (p256_validate_pubkey(data + 1) != P256_SUCCESS) {
+            return PSA_ERROR_INVALID_ARGUMENT;
+        }
+    } else if (type == PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1)) {
+        if (data_length != PRIVKEY_SIZE) {
+            return *bits == 0 ? PSA_ERROR_NOT_SUPPORTED : PSA_ERROR_INVALID_ARGUMENT;
+        }
+        if (p256_validate_privkey(data) != P256_SUCCESS) {
+            return PSA_ERROR_INVALID_ARGUMENT;
+        }
+    } else {
+        return PSA_ERROR_NOT_SUPPORTED;
+    }
+    *bits = CURVE_BITS;
+
+    /* We only support the export format for input, so just copy. */
+    if (key_buffer_size < data_length) {
+        return PSA_ERROR_BUFFER_TOO_SMALL;
+    }
+    memcpy(key_buffer, data, data_length);
+    *key_buffer_length = data_length;
+
+    return PSA_SUCCESS;
+}
+
+psa_status_t p256_transparent_export_public_key(const psa_key_attributes_t *attributes,
+                                    const uint8_t *key_buffer,
+                                    size_t key_buffer_size,
+                                    uint8_t *data,
+                                    size_t data_size,
+                                    size_t *data_length)
+{
+    /* Is this the right curve? */
+    size_t bits = psa_get_key_bits(attributes);
+    psa_key_type_t type = psa_get_key_type(attributes);
+    if (bits != CURVE_BITS || type != PSA_KEY_TYPE_ECC_KEY_PAIR(PSA_ECC_FAMILY_SECP_R1)) {
+        return PSA_ERROR_NOT_SUPPORTED;
+    }
+
+    /* Validate sizes, as p256-m expects fixed-size buffers */
+    if (key_buffer_size != PRIVKEY_SIZE) {
+        return PSA_ERROR_INVALID_ARGUMENT;
+    }
+    if (data_size < PSA_PUBKEY_SIZE) {
+        return PSA_ERROR_BUFFER_TOO_SMALL;
+    }
+
+    /* See INFORMATION ON PSA KEY EXPORT FORMATS near top of file */
+    data[0] = PSA_PUBKEY_HEADER_BYTE;
+    int ret = p256_public_from_private(data + 1, key_buffer);
+    if (ret == P256_SUCCESS) {
+        *data_length = PSA_PUBKEY_SIZE;
+    }
+
+    return p256_to_psa_error(ret);
+}
+
+psa_status_t p256_transparent_generate_key(
+    const psa_key_attributes_t *attributes,
+    uint8_t *key_buffer,
+    size_t key_buffer_size,
+    size_t *key_buffer_length)
+{
+    /* We don't use this argument, but the specification mandates the signature
+     * of driver entry-points. (void) used to avoid compiler warning. */
+    (void) attributes;
+
+    /* Validate sizes, as p256-m expects fixed-size buffers */
+    if (key_buffer_size != PRIVKEY_SIZE) {
+        return PSA_ERROR_BUFFER_TOO_SMALL;
+    }
+
+    /*
+     *  p256-m's keypair generation function outputs both public and private
+     *  keys. Allocate a buffer to which the public key will be written. The
+     *  private key will be written to key_buffer, which is passed to this
+     *  function as an argument. */
+    uint8_t public_key_buffer[P256_PUBKEY_SIZE];
+
+    int ret = p256_gen_keypair(key_buffer, public_key_buffer);
+    if (ret == P256_SUCCESS) {
+        *key_buffer_length = PRIVKEY_SIZE;
+    }
+
+    return p256_to_psa_error(ret);
+}
+
+psa_status_t p256_transparent_key_agreement(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *peer_key,
+    size_t peer_key_length,
+    uint8_t *shared_secret,
+    size_t shared_secret_size,
+    size_t *shared_secret_length)
+{
+    /* We don't use these arguments, but the specification mandates the
+     * sginature of driver entry-points. (void) used to avoid compiler
+     * warning. */
+    (void) attributes;
+    (void) alg;
+
+    /* Validate sizes, as p256-m expects fixed-size buffers */
+    if (key_buffer_size != PRIVKEY_SIZE || peer_key_length != PSA_PUBKEY_SIZE) {
+        return PSA_ERROR_INVALID_ARGUMENT;
+    }
+    if (shared_secret_size < SHARED_SECRET_SIZE) {
+        return PSA_ERROR_BUFFER_TOO_SMALL;
+    }
+
+    /* See INFORMATION ON PSA KEY EXPORT FORMATS near top of file */
+    const uint8_t *peer_key_p256m = peer_key + 1;
+    int ret = p256_ecdh_shared_secret(shared_secret, key_buffer, peer_key_p256m);
+    if (ret == P256_SUCCESS) {
+        *shared_secret_length = SHARED_SECRET_SIZE;
+    }
+
+    return p256_to_psa_error(ret);
+}
+
+psa_status_t p256_transparent_sign_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    uint8_t *signature,
+    size_t signature_size,
+    size_t *signature_length)
+{
+    /* We don't use these arguments, but the specification mandates the
+     * sginature of driver entry-points. (void) used to avoid compiler
+     * warning. */
+    (void) attributes;
+    (void) alg;
+
+    /* Validate sizes, as p256-m expects fixed-size buffers */
+    if (key_buffer_size != PRIVKEY_SIZE) {
+        return PSA_ERROR_INVALID_ARGUMENT;
+    }
+    if (signature_size < SIGNATURE_SIZE) {
+        return PSA_ERROR_BUFFER_TOO_SMALL;
+    }
+
+    int ret = p256_ecdsa_sign(signature, key_buffer, hash, hash_length);
+    if (ret == P256_SUCCESS) {
+        *signature_length = SIGNATURE_SIZE;
+    }
+
+    return p256_to_psa_error(ret);
+}
+
+/*  This function expects the key buffer to contain a PSA public key,
+ *  as exported by psa_export_public_key() */
+static psa_status_t p256_verify_hash_with_public_key(
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    const uint8_t *hash,
+    size_t hash_length,
+    const uint8_t *signature,
+    size_t signature_length)
+{
+    /* Validate sizes, as p256-m expects fixed-size buffers */
+    if (key_buffer_size != PSA_PUBKEY_SIZE || *key_buffer != PSA_PUBKEY_HEADER_BYTE) {
+        return PSA_ERROR_INVALID_ARGUMENT;
+    }
+    if (signature_length != SIGNATURE_SIZE) {
+        return PSA_ERROR_INVALID_SIGNATURE;
+    }
+
+    /* See INFORMATION ON PSA KEY EXPORT FORMATS near top of file */
+    const uint8_t *public_key_p256m = key_buffer + 1;
+    int ret = p256_ecdsa_verify(signature, public_key_p256m, hash, hash_length);
+
+    return p256_to_psa_error(ret);
+}
+
+psa_status_t p256_transparent_verify_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    const uint8_t *signature,
+    size_t signature_length)
+{
+    /* We don't use this argument, but the specification mandates the signature
+     * of driver entry-points. (void) used to avoid compiler warning. */
+    (void) alg;
+
+    psa_status_t status;
+    uint8_t public_key_buffer[PSA_PUBKEY_SIZE];
+    size_t public_key_buffer_size = PSA_PUBKEY_SIZE;
+
+    size_t public_key_length = PSA_PUBKEY_SIZE;
+    /* As p256-m doesn't require dynamic allocation, we want to avoid it in
+     * the entrypoint functions as well. psa_driver_wrapper_export_public_key()
+     * requires size_t*, so we use a pointer to a stack variable. */
+    size_t *public_key_length_ptr = &public_key_length;
+
+    /* The contents of key_buffer may either be the 32 byte private key
+     * (keypair format), or 0x04 followed by the 64 byte public key (public
+     * key format). To ensure the key is in the latter format, the public key
+     * is exported. */
+    status = psa_driver_wrapper_export_public_key(
+        attributes,
+        key_buffer,
+        key_buffer_size,
+        public_key_buffer,
+        public_key_buffer_size,
+        public_key_length_ptr);
+    if (status != PSA_SUCCESS) {
+        goto exit;
+    }
+
+    status = p256_verify_hash_with_public_key(
+        public_key_buffer,
+        public_key_buffer_size,
+        hash,
+        hash_length,
+        signature,
+        signature_length);
+
+exit:
+    return status;
+}
+
+#endif /* MBEDTLS_PSA_P256M_DRIVER_ENABLED */

diff --git a/3rdparty/p256-m/p256-m_driver_entrypoints.h b/3rdparty/p256-m/p256-m_driver_entrypoints.h
new file mode 100644
index 0000000..c740c45
--- /dev/null
+++ b/3rdparty/p256-m/p256-m_driver_entrypoints.h

@@ -0,0 +1,219 @@
+/*
+ *   Driver entry points for p256-m
+ */
+/*
+ *  Copyright The Mbed TLS Contributors
+ *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+ */
+
+#ifndef P256M_DRIVER_ENTRYPOINTS_H
+#define P256M_DRIVER_ENTRYPOINTS_H
+
+#if defined(MBEDTLS_PSA_P256M_DRIVER_ENABLED)
+#ifndef PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT
+#define PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT
+#endif /* PSA_CRYPTO_ACCELERATOR_DRIVER_PRESENT */
+#endif /* MBEDTLS_PSA_P256M_DRIVER_ENABLED */
+
+#include "psa/crypto_types.h"
+
+/** Import SECP256R1 key.
+ *
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[in]  data                 The raw key material. For private keys
+ *                                  this must be a big-endian integer of 32
+ *                                  bytes; for public key this must be an
+ *                                  uncompressed ECPoint (65 bytes).
+ * \param[in]  data_length          The size of the raw key material.
+ * \param[out] key_buffer           The buffer to contain the key data in
+ *                                  output format upon successful return.
+ * \param[in]  key_buffer_size      Size of the \p key_buffer buffer in bytes.
+ * \param[out] key_buffer_length    The length of the data written in \p
+ *                                  key_buffer in bytes.
+ * \param[out] bits                 The bitsize of the key.
+ *
+ * \retval  #PSA_SUCCESS
+ *          Success. Keypair generated and stored in buffer.
+ * \retval  #PSA_ERROR_NOT_SUPPORTED
+ *          The input is not supported by this driver (not SECP256R1).
+ * \retval  #PSA_ERROR_INVALID_ARGUMENT
+ *          The input is invalid.
+ * \retval  #PSA_ERROR_BUFFER_TOO_SMALL
+ *          \p key_buffer_size is too small.
+ */
+psa_status_t p256_transparent_import_key(const psa_key_attributes_t *attributes,
+                             const uint8_t *data,
+                             size_t data_length,
+                             uint8_t *key_buffer,
+                             size_t key_buffer_size,
+                             size_t *key_buffer_length,
+                             size_t *bits);
+
+/** Export SECP256R1 public key, from the private key.
+ *
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[in]  key_buffer           The private key in the export format.
+ * \param[in]  key_buffer_size      The size of the private key in bytes.
+ * \param[out] data                 The buffer to contain the public key in
+ *                                  the export format upon successful return.
+ * \param[in]  data_size            The size of the \p data buffer in bytes.
+ * \param[out] data_length          The length written to \p data in bytes.
+ *
+ * \retval  #PSA_SUCCESS
+ *          Success. Keypair generated and stored in buffer.
+ * \retval  #PSA_ERROR_NOT_SUPPORTED
+ *          The input is not supported by this driver (not SECP256R1).
+ * \retval  #PSA_ERROR_INVALID_ARGUMENT
+ *          The input is invalid.
+ * \retval  #PSA_ERROR_BUFFER_TOO_SMALL
+ *          \p key_buffer_size is too small.
+ */
+psa_status_t p256_transparent_export_public_key(const psa_key_attributes_t *attributes,
+                                    const uint8_t *key_buffer,
+                                    size_t key_buffer_size,
+                                    uint8_t *data,
+                                    size_t data_size,
+                                    size_t *data_length);
+
+/** Generate SECP256R1 ECC Key Pair.
+ *  Interface function which calls the p256-m key generation function and
+ *  places it in the key buffer provided by the caller (Mbed TLS) in the
+ *  correct format. For a SECP256R1 curve this is the 32 bit private key.
+ *
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[out]  key_buffer          The buffer to contain the key data in
+ *                                  output format upon successful return.
+ * \param[in]   key_buffer_size     Size of the \p key_buffer buffer in bytes.
+ * \param[out]  key_buffer_length   The length of the data written in \p
+ *                                  key_buffer in bytes.
+ *
+ * \retval  #PSA_SUCCESS
+ *          Success. Keypair generated and stored in buffer.
+ * \retval  #PSA_ERROR_BUFFER_TOO_SMALL
+ *          \p key_buffer_size is too small.
+ * \retval  #PSA_ERROR_GENERIC_ERROR
+ *          The internal RNG failed.
+ */
+psa_status_t p256_transparent_generate_key(
+    const psa_key_attributes_t *attributes,
+    uint8_t *key_buffer,
+    size_t key_buffer_size,
+    size_t *key_buffer_length);
+
+/** Perform raw key agreement using p256-m's ECDH implementation
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[in]  key_buffer           The buffer containing the private key
+ *                                  in the format specified by PSA.
+ * \param[in]  key_buffer_size      Size of the \p key_buffer buffer in bytes.
+ * \param[in]  alg                  A key agreement algorithm that is
+ *                                  compatible with the type of the key.
+ * \param[in]  peer_key             The buffer containing the peer's public
+ *                                  key in format specified by PSA.
+ * \param[in]  peer_key_length      Size of the \p peer_key buffer in
+ *                                  bytes.
+ * \param[out] shared_secret        The buffer to which the shared secret
+ *                                  is to be written.
+ * \param[in]  shared_secret_size   Size of the \p shared_secret buffer in
+ *                                  bytes.
+ * \param[out] shared_secret_length On success, the number of bytes that
+ *                                  make up the returned shared secret.
+ * \retval  #PSA_SUCCESS
+ *          Success. Shared secret successfully calculated.
+ * \retval  #PSA_ERROR_INVALID_ARGUMENT
+ *          The input is invalid.
+ * \retval  #PSA_ERROR_BUFFER_TOO_SMALL
+ *          \p shared_secret_size is too small.
+ */
+psa_status_t p256_transparent_key_agreement(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *peer_key,
+    size_t peer_key_length,
+    uint8_t *shared_secret,
+    size_t shared_secret_size,
+    size_t *shared_secret_length);
+
+/** Sign an already-calculated hash with a private key using p256-m's ECDSA
+ *  implementation
+ * \param[in]  attributes           The attributes of the key to use for the
+ *                                  operation.
+ * \param[in]  key_buffer           The buffer containing the private key
+ *                                  in the format specified by PSA.
+ * \param[in]  key_buffer_size      Size of the \p key_buffer buffer in bytes.
+ * \param[in]  alg                  A signature algorithm that is compatible
+ *                                  with the type of the key.
+ * \param[in]  hash                 The hash to sign.
+ * \param[in]  hash_length          Size of the \p hash buffer in bytes.
+ * \param[out] signature            Buffer where signature is to be written.
+ * \param[in]  signature_size       Size of the \p signature buffer in bytes.
+ * \param[out] signature_length     On success, the number of bytes
+ *                                  that make up the returned signature value.
+ *
+ * \retval  #PSA_SUCCESS
+ *          Success. Hash was signed successfully.
+ * \retval  #PSA_ERROR_INVALID_ARGUMENT
+ *          The input is invalid.
+ * \retval  #PSA_ERROR_BUFFER_TOO_SMALL
+ *          \p signature_size is too small.
+ * \retval  #PSA_ERROR_GENERIC_ERROR
+ *          The internal RNG failed.
+ */
+psa_status_t p256_transparent_sign_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    uint8_t *signature,
+    size_t signature_size,
+    size_t *signature_length);
+
+/** Verify the signature of a hash using a SECP256R1 public key using p256-m's
+ *  ECDSA implementation.
+ *
+ * \note p256-m expects a 64 byte public key, but the contents of the key
+         buffer may be the 32 byte keypair representation or the 65 byte
+         public key representation. As a result, this function calls
+         psa_driver_wrapper_export_public_key() to ensure the public key
+         can be passed to p256-m.
+ *
+ * \param[in]  attributes       The attributes of the key to use for the
+ *                              operation.
+ *
+ * \param[in]  key_buffer       The buffer containing the key
+ *                              in the format specified by PSA.
+ * \param[in]  key_buffer_size  Size of the \p key_buffer buffer in bytes.
+ * \param[in]  alg              A signature algorithm that is compatible with
+ *                              the type of the key.
+ * \param[in]  hash             The hash whose signature is to be
+ *                              verified.
+ * \param[in]  hash_length      Size of the \p hash buffer in bytes.
+ * \param[in]  signature        Buffer containing the signature to verify.
+ * \param[in]  signature_length Size of the \p signature buffer in bytes.
+ *
+ * \retval  #PSA_SUCCESS
+ *          The signature is valid.
+ * \retval  #PSA_ERROR_INVALID_SIGNATURE
+ *          The calculation was performed successfully, but the passed
+ *          signature is not a valid signature.
+ * \retval  #PSA_ERROR_INVALID_ARGUMENT
+ *          The input is invalid.
+ */
+psa_status_t p256_transparent_verify_hash(
+    const psa_key_attributes_t *attributes,
+    const uint8_t *key_buffer,
+    size_t key_buffer_size,
+    psa_algorithm_t alg,
+    const uint8_t *hash,
+    size_t hash_length,
+    const uint8_t *signature,
+    size_t signature_length);
+
+#endif /* P256M_DRIVER_ENTRYPOINTS_H */
commit	0344c602eadc0802776b65ff90f0a02c856cf53c	[log] [tgz]
author	Tom Rini <trini@konsulko.com>	Tue Oct 08 13:56:50 2024 -0600
committer	Tom Rini <trini@konsulko.com>	Tue Oct 08 13:56:50 2024 -0600
tree	236a705740939b84ff37d68ae650061dd14c3449