From 2c570ed53b0f637dd248c4a8c30f1df0790076fb Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Mon, 30 Oct 2023 15:18:45 +0530 Subject: [PATCH 1/7] change(mbedtls/aes): moved esp_aes_internal.h to be a private header - Also enable AES-GCM test in the hal crypto test app for all targets --- .../hal/test_apps/crypto/main/CMakeLists.txt | 3 +- .../hal/test_apps/crypto/main/aes/test_aes.c | 13 ++--- components/mbedtls/CMakeLists.txt | 1 + components/mbedtls/port/aes/block/esp_aes.c | 2 +- components/mbedtls/port/aes/dma/esp_aes.c | 2 +- components/mbedtls/port/aes/esp_aes_common.c | 2 +- components/mbedtls/port/aes/esp_aes_gcm.c | 2 +- .../port/aes/include/esp_aes_internal.h | 37 +++++++++++++ .../port/include/aes/esp_aes_internal.h | 54 ------------------- tools/ci/check_copyright_ignore.txt | 1 - 10 files changed, 48 insertions(+), 69 deletions(-) create mode 100644 components/mbedtls/port/aes/include/esp_aes_internal.h delete mode 100644 components/mbedtls/port/include/aes/esp_aes_internal.h diff --git a/components/hal/test_apps/crypto/main/CMakeLists.txt b/components/hal/test_apps/crypto/main/CMakeLists.txt index d2e4647dfd..460bf3b120 100644 --- a/components/hal/test_apps/crypto/main/CMakeLists.txt +++ b/components/hal/test_apps/crypto/main/CMakeLists.txt @@ -25,7 +25,8 @@ if(CONFIG_SOC_AES_SUPPORTED) list(APPEND srcs "aes/test_aes.c" "$ENV{IDF_PATH}/components/mbedtls/port/aes/esp_aes_common.c" "aes/aes_block.c") - list(APPEND priv_include_dirs "$ENV{IDF_PATH}/components/mbedtls/port/include") + list(APPEND priv_include_dirs "$ENV{IDF_PATH}/components/mbedtls/port/include" + "$ENV{IDF_PATH}/components/mbedtls/port/aes/include") if(CONFIG_SOC_AES_SUPPORT_DMA) list(APPEND priv_include_dirs "$ENV{IDF_PATH}/components/mbedtls/port/aes/dma/include") diff --git a/components/hal/test_apps/crypto/main/aes/test_aes.c b/components/hal/test_apps/crypto/main/aes/test_aes.c index 7b7d185c0f..518669b5bd 100644 --- a/components/hal/test_apps/crypto/main/aes/test_aes.c +++ b/components/hal/test_apps/crypto/main/aes/test_aes.c @@ -266,7 +266,8 @@ static void test_cfb128_aes(size_t buffer_size, const uint8_t expected_cipher_en heap_caps_free(decryptedtext); } -#if SOC_AES_SUPPORT_GCM +#define CIPHER_ID_AES 2 + static void test_gcm_aes(size_t length, const uint8_t expected_last_block[16], const uint8_t expected_tag[16]) { uint8_t iv[16]; @@ -295,10 +296,10 @@ static void test_gcm_aes(size_t length, const uint8_t expected_last_block[16], c memcpy(iv_buf, iv, iv_length); esp_aes_gcm_init(&ctx); - esp_aes_gcm_setkey(&ctx, 0, key, 8 * sizeof(key)); + TEST_ASSERT(esp_aes_gcm_setkey(&ctx, CIPHER_ID_AES, key, 8 * sizeof(key)) == 0); /* Encrypt and authenticate */ - esp_aes_gcm_crypt_and_tag(&ctx, ESP_AES_ENCRYPT, length, iv_buf, iv_length, add, add_length, plaintext, ciphertext, tag_len, tag_buf_encrypt); + TEST_ASSERT(esp_aes_gcm_crypt_and_tag(&ctx, ESP_AES_ENCRYPT, length, iv_buf, iv_length, add, add_length, plaintext, ciphertext, tag_len, tag_buf_encrypt) == 0); size_t offset = length > 16 ? length - 16 : 0; /* Sanity check: make sure the last ciphertext block matches what we expect to see. */ TEST_ASSERT_EQUAL_HEX8_ARRAY(expected_last_block, ciphertext + offset, MIN(16, length)); @@ -314,7 +315,6 @@ static void test_gcm_aes(size_t length, const uint8_t expected_last_block[16], c heap_caps_free(ciphertext); heap_caps_free(decryptedtext); } -#endif /* SOC_AES_SUPPORT_GCM */ #endif /* SOC_AES_SUPPORT_DMA */ TEST(aes, cbc_aes_256_block_test) @@ -457,8 +457,6 @@ TEST(aes, cfb128_aes_256_long_dma_test) #endif -#if SOC_AES_SUPPORT_GCM - TEST(aes, gcm_aes_dma_test) { size_t length = 16; @@ -489,7 +487,6 @@ TEST(aes, gcm_aes_long_dma_test) test_gcm_aes(length, expected_last_block, expected_tag); } #endif /* CONFIG_CRYPTO_TESTAPP_USE_AES_INTERRUPT */ -#endif /* SOC_AES_SUPPORT_GCM */ #endif /* SOC_AES_SUPPORT_DMA */ TEST_GROUP_RUNNER(aes) @@ -509,12 +506,10 @@ TEST_GROUP_RUNNER(aes) RUN_TEST_CASE(aes, cfb8_aes_256_long_dma_test); RUN_TEST_CASE(aes, cfb128_aes_256_long_dma_test); #endif /* CONFIG_CRYPTO_TESTAPP_USE_AES_INTERRUPT */ -#if SOC_AES_SUPPORT_GCM RUN_TEST_CASE(aes, gcm_aes_dma_test); #if CONFIG_CRYPTO_TESTAPP_USE_AES_INTERRUPT RUN_TEST_CASE(aes, gcm_aes_long_dma_test); #endif /* CONFIG_CRYPTO_TESTAPP_USE_AES_INTERRUPT */ -#endif /* SOC_AES_SUPPORT_GCM */ #endif /* SOC_AES_SUPPORT_DMA */ } diff --git a/components/mbedtls/CMakeLists.txt b/components/mbedtls/CMakeLists.txt index 28428f25d8..57b832fd99 100644 --- a/components/mbedtls/CMakeLists.txt +++ b/components/mbedtls/CMakeLists.txt @@ -206,6 +206,7 @@ target_sources(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/esp_mem.c" ) if(CONFIG_SOC_AES_SUPPORTED) + target_include_directories(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/aes/include") target_sources(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/aes/esp_aes_xts.c" "${COMPONENT_DIR}/port/aes/esp_aes_common.c" "${COMPONENT_DIR}/port/aes/${AES_PERIPHERAL_TYPE}/esp_aes.c" diff --git a/components/mbedtls/port/aes/block/esp_aes.c b/components/mbedtls/port/aes/block/esp_aes.c index fcf4e2a67b..a83a89f8e9 100644 --- a/components/mbedtls/port/aes/block/esp_aes.c +++ b/components/mbedtls/port/aes/block/esp_aes.c @@ -33,7 +33,7 @@ #include "soc/hwcrypto_periph.h" #include #include "hal/aes_hal.h" -#include "aes/esp_aes_internal.h" +#include "esp_aes_internal.h" #include diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index 8f0e33c953..8fe7b4d522 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -42,7 +42,7 @@ #endif #include "esp_crypto_lock.h" #include "hal/aes_hal.h" -#include "aes/esp_aes_internal.h" +#include "esp_aes_internal.h" #include "esp_aes_dma_priv.h" #if CONFIG_IDF_TARGET_ESP32S2 diff --git a/components/mbedtls/port/aes/esp_aes_common.c b/components/mbedtls/port/aes/esp_aes_common.c index 4bc8b25cee..bffd568c3a 100644 --- a/components/mbedtls/port/aes/esp_aes_common.c +++ b/components/mbedtls/port/aes/esp_aes_common.c @@ -15,7 +15,7 @@ * http://csrc.nist.gov/publications/fips/fips197/fips-197.pdf */ #include "sdkconfig.h" -#include "aes/esp_aes_internal.h" +#include "esp_aes_internal.h" #include "mbedtls/aes.h" #include "hal/aes_hal.h" #include "hal/aes_types.h" diff --git a/components/mbedtls/port/aes/esp_aes_gcm.c b/components/mbedtls/port/aes/esp_aes_gcm.c index 9798fe475c..76d68544d7 100644 --- a/components/mbedtls/port/aes/esp_aes_gcm.c +++ b/components/mbedtls/port/aes/esp_aes_gcm.c @@ -18,7 +18,7 @@ #include "aes/esp_aes.h" #include "aes/esp_aes_gcm.h" -#include "aes/esp_aes_internal.h" +#include "esp_aes_internal.h" #include "hal/aes_hal.h" #include "mbedtls/aes.h" diff --git a/components/mbedtls/port/aes/include/esp_aes_internal.h b/components/mbedtls/port/aes/include/esp_aes_internal.h new file mode 100644 index 0000000000..889888e83f --- /dev/null +++ b/components/mbedtls/port/aes/include/esp_aes_internal.h @@ -0,0 +1,37 @@ +/* + * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "aes/esp_aes.h" +#include "aes/esp_aes_gcm.h" +#include "soc/soc_caps.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +bool valid_key_length(const esp_aes_context *ctx); + +#if SOC_AES_SUPPORT_GCM +/** + * @brief Run a AES-GCM conversion using DMA + * + * @param ctx Aes context + * @param input Pointer to input data + * @param output Pointer to output data + * @param len Length of the input data + * @param aad_desc GCM additional data DMA descriptor + * @param aad_len GCM additional data length + * @return int -1 on error + */ +int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, lldesc_t *aad_desc, size_t aad_len); +#endif + +#ifdef __cplusplus +} +#endif diff --git a/components/mbedtls/port/include/aes/esp_aes_internal.h b/components/mbedtls/port/include/aes/esp_aes_internal.h deleted file mode 100644 index 35a7d3935a..0000000000 --- a/components/mbedtls/port/include/aes/esp_aes_internal.h +++ /dev/null @@ -1,54 +0,0 @@ -/** - * \brief AES block cipher, ESP-IDF hardware accelerated version - * Based on mbedTLS FIPS-197 compliant version. - * - * Copyright (C) 2006-2015, ARM Limited, All Rights Reserved - * Additions Copyright (C) 2016, Espressif Systems (Shanghai) PTE Ltd - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Internal API - */ - -#pragma once - - -#include "aes/esp_aes.h" -#include "aes/esp_aes_gcm.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -bool valid_key_length(const esp_aes_context *ctx); - - -/** - * @brief Run a AES-GCM conversion using DMA - * - * @param ctx Aes context - * @param input Pointer to input data - * @param output Pointer to output data - * @param len Length of the input data - * @param aad_desc GCM additional data DMA descriptor - * @param aad_len GCM additional data length - * @return int -1 on error - */ -int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, lldesc_t *aad_desc, size_t aad_len); - - -#ifdef __cplusplus -} -#endif diff --git a/tools/ci/check_copyright_ignore.txt b/tools/ci/check_copyright_ignore.txt index d98620c6e3..c1fbfb935a 100644 --- a/tools/ci/check_copyright_ignore.txt +++ b/tools/ci/check_copyright_ignore.txt @@ -507,7 +507,6 @@ components/mbedtls/port/aes/dma/esp_aes.c components/mbedtls/port/aes/dma/esp_aes_crypto_dma_impl.c components/mbedtls/port/aes/esp_aes_xts.c components/mbedtls/port/include/aes/esp_aes.h -components/mbedtls/port/include/aes/esp_aes_internal.h components/mbedtls/port/include/aes_alt.h components/mbedtls/port/include/bignum_impl.h components/mbedtls/port/include/esp32/aes.h From 83dd60307f011173f91cf5eb3536d748962ebfa7 Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Thu, 12 Oct 2023 14:51:04 +0530 Subject: [PATCH 2/7] feat(mbedtls/esp_crypto_shared_gdma): support AXI-DMA in the crypto shared gdma layer - In case of AXI-DMA, the DMA descriptors need to be 8 bytes aligned lldesc_t do not satify this condition thus we need to replace it with dma_descriptor_t (align(4) and align(8)) in esp_crypto_shared_gdma. - Added new shared gdma start API that supports the dma_descriptor_t DMA descriptor. - Added some generic dma descriptor macros and helper functions - replace lldesc_t with dma_descriptor_t --- components/hal/include/hal/dma_types.h | 21 ++- components/mbedtls/port/aes/dma/esp_aes.c | 177 +++++++++++------- .../port/aes/dma/esp_aes_crypto_dma_impl.c | 26 +-- .../mbedtls/port/aes/dma/esp_aes_gdma_impl.c | 9 +- .../port/aes/dma/include/esp_aes_dma_priv.h | 7 +- components/mbedtls/port/aes/esp_aes_gcm.c | 24 +-- .../port/aes/include/esp_aes_internal.h | 8 +- .../esp_crypto_shared_gdma.c | 88 ++++++++- .../mbedtls/port/include/aes/esp_aes_gcm.h | 1 - .../mbedtls/port/include/esp_crypto_dma.h | 45 +++++ .../port/include/esp_crypto_shared_gdma.h | 23 ++- .../test_apps/main/test_aes_sha_parallel.c | 2 - tools/ci/check_copyright_ignore.txt | 1 - 13 files changed, 309 insertions(+), 123 deletions(-) create mode 100644 components/mbedtls/port/include/esp_crypto_dma.h diff --git a/components/hal/include/hal/dma_types.h b/components/hal/include/hal/dma_types.h index a0cae11ea3..52e6c542c7 100644 --- a/components/hal/include/hal/dma_types.h +++ b/components/hal/include/hal/dma_types.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -7,6 +7,7 @@ #pragma once #include +#include #include "esp_assert.h" #ifdef __cplusplus @@ -56,6 +57,24 @@ ESP_STATIC_ASSERT(sizeof(dma_descriptor_align8_t) == 16, "dma_descriptor_align8_ #define DMA_DESCRIPTOR_BUFFER_OWNER_DMA (1) /*!< DMA buffer is allowed to be accessed by DMA engine */ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE (4095) /*!< Maximum size of the buffer that can be attached to descriptor */ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED (4095-3) /*!< Maximum size of the buffer that can be attached to descriptor, and aligned to 4B */ +#define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED (4095-15) /*!< Maximum size of the buffer that can be attached to descriptor, and aligned to 16B */ + +// the size field has 12 bits, but 0 not for 4096. +// to avoid possible problem when the size is not word-aligned, we only use 4096-4 per desc. +/** Maximum size of data in the buffer that a DMA descriptor can hold. */ +#define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC (4096-4) + +/** + * Get the number of DMA descriptors required for a given buffer size. + * + * @param data_size Size to check DMA descriptor number. + * @param max_desc_size Maximum length of each descriptor + * @return Number of DMA descriptors required. + */ +static inline size_t dma_desc_get_required_num(size_t data_size, size_t max_desc_size) +{ + return (data_size + max_desc_size - 1) / max_desc_size; +} #ifdef __cplusplus } diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index 8fe7b4d522..9aaa327ff9 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -33,7 +33,7 @@ #include "esp_private/periph_ctrl.h" #include "esp_log.h" #include "esp_attr.h" -#include "soc/lldesc.h" +#include "esp_crypto_dma.h" #include "esp_heap_caps.h" #include "esp_memory_utils.h" #include "sys/param.h" @@ -42,8 +42,8 @@ #endif #include "esp_crypto_lock.h" #include "hal/aes_hal.h" -#include "esp_aes_internal.h" #include "esp_aes_dma_priv.h" +#include "esp_aes_internal.h" #if CONFIG_IDF_TARGET_ESP32S2 #include "esp32s2/rom/cache.h" @@ -103,25 +103,19 @@ static bool s_check_dma_capable(const void *p); * * Must be in DMA capable memory, so stack is not a safe place to put them * * To avoid having to malloc/free them for every DMA operation */ -static DRAM_ATTR lldesc_t s_stream_in_desc; -static DRAM_ATTR lldesc_t s_stream_out_desc; +static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; +static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; -static inline void esp_aes_wait_dma_done(lldesc_t *output) +/** Append a descriptor to the chain, set head if chain empty + * + * @param[out] head Pointer to the first/head node of the DMA descriptor linked list + * @param item Pointer to the DMA descriptor node that has to be appended + */ +static inline void dma_desc_append(crypto_dma_desc_t **head, crypto_dma_desc_t *item) { - /* Wait for DMA write operation to complete */ - while (1) { - if ( esp_aes_dma_done(output) ) { - break; - } - } -} - -/* Append a descriptor to the chain, set head if chain empty */ -static inline void lldesc_append(lldesc_t **head, lldesc_t *item) -{ - lldesc_t *it; + crypto_dma_desc_t *it; if (*head == NULL) { *head = item; return; @@ -129,11 +123,61 @@ static inline void lldesc_append(lldesc_t **head, lldesc_t *item) it = *head; - while (it->empty != 0) { - it = (lldesc_t *)it->empty; + while (it->next != 0) { + it = (crypto_dma_desc_t *)it->next; + } + it->dw0.suc_eof = 0; + it->next = item; +} + +/** + * Generate a linked list pointing to a (huge) buffer in an descriptor array. + * + * The caller should ensure there is enough size to hold the array, by calling + * `dma_desc_get_required_num` with the same or less than the max_desc_size argument. + * + * @param[out] dmadesc Output of a descriptor array, the head should be fed to the DMA. + * @param data Buffer for the descriptors to point to. + * @param len Size (or length for TX) of the buffer + * @param max_desc_size Maximum length of each descriptor + * @param isrx The RX DMA may require the buffer to be word-aligned, set to true for a RX link, otherwise false. + */ +static inline void dma_desc_setup_link(crypto_dma_desc_t* dmadesc, const void *data, int len, int max_desc_size, bool isrx) +{ + int i = 0; + while (len) { + int dmachunklen = len; + if (dmachunklen > max_desc_size) { + dmachunklen = max_desc_size; + } + if (isrx) { + //Receive needs DMA length rounded to next 32-bit boundary + dmadesc[i].dw0.size = (dmachunklen + 3) & (~3); + dmadesc[i].dw0.length = (dmachunklen + 3) & (~3); + } else { + dmadesc[i].dw0.size = dmachunklen; + dmadesc[i].dw0.length = dmachunklen; + } + dmadesc[i].buffer = (void *)data; + dmadesc[i].dw0.suc_eof = 0; + dmadesc[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + dmadesc[i].next = &dmadesc[i + 1]; + len -= dmachunklen; + data += dmachunklen; + i++; + } + dmadesc[i - 1].dw0.suc_eof = 1; //Mark last DMA desc as end of stream. + dmadesc[i - 1].next = NULL; +} + +static inline void esp_aes_wait_dma_done(crypto_dma_desc_t *output) +{ + /* Wait for DMA write operation to complete */ + while (1) { + if ( esp_aes_dma_done(output) ) { + break; + } } - it->eof = 0; - it->empty = (uint32_t)item; } void esp_aes_acquire_hardware( void ) @@ -221,7 +265,7 @@ static esp_err_t esp_aes_isr_initialise( void ) #endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT /* Wait for AES hardware block operation to complete */ -static int esp_aes_dma_wait_complete(bool use_intr, lldesc_t *output_desc) +static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_desc) { #if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) if (use_intr) { @@ -335,10 +379,10 @@ cleanup: */ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) { - lldesc_t *in_desc_head = NULL, *out_desc_head = NULL; - lldesc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ - lldesc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; - size_t lldesc_num = 0; + crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL; + crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ + crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; + size_t crypto_dma_desc_num = 0; unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block unsigned block_bytes = len - stream_bytes; // bytes which are in a full block unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); @@ -388,10 +432,10 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, } /* Set up dma descriptors for input and output considering the 16 byte alignment requirement for EDMA */ - lldesc_num = lldesc_get_required_num_constrained(block_bytes, LLDESC_MAX_NUM_PER_DESC_16B_ALIGNED); + crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED); /* Allocate both in and out descriptors to save a malloc/free per function call */ - block_desc = heap_caps_calloc(lldesc_num * 2, sizeof(lldesc_t), MALLOC_CAP_DMA); + block_desc = heap_caps_calloc(crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); if (block_desc == NULL) { mbedtls_platform_zeroize(output, len); ESP_LOGE(TAG, "Failed to allocate memory"); @@ -399,36 +443,37 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, } block_in_desc = block_desc; - block_out_desc = block_desc + lldesc_num; + block_out_desc = block_desc + crypto_dma_desc_num; + + dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - lldesc_setup_link(block_in_desc, input, block_bytes, 0); //Limit max inlink descriptor length to be 16 byte aligned, require for EDMA - lldesc_setup_link_constrained(block_out_desc, output, block_bytes, LLDESC_MAX_NUM_PER_DESC_16B_ALIGNED, 0); + dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED, 0); /* Setup in/out start descriptors */ - lldesc_append(&in_desc_head, block_in_desc); - lldesc_append(&out_desc_head, block_out_desc); + dma_desc_append(&in_desc_head, block_in_desc); + dma_desc_append(&out_desc_head, block_out_desc); - out_desc_tail = &block_out_desc[lldesc_num - 1]; + out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; } /* Any leftover bytes which are appended as an additional DMA list */ if (stream_bytes > 0) { - memset(&s_stream_in_desc, 0, sizeof(lldesc_t)); - memset(&s_stream_out_desc, 0, sizeof(lldesc_t)); + memset(&s_stream_in_desc, 0, sizeof(crypto_dma_desc_t)); + memset(&s_stream_out_desc, 0, sizeof(crypto_dma_desc_t)); memset(s_stream_in, 0, AES_BLOCK_BYTES); memset(s_stream_out, 0, AES_BLOCK_BYTES); memcpy(s_stream_in, input + block_bytes, stream_bytes); - lldesc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, 0); - lldesc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, 0); + dma_desc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); + dma_desc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); /* Link with block descriptors */ - lldesc_append(&in_desc_head, &s_stream_in_desc); - lldesc_append(&out_desc_head, &s_stream_out_desc); + dma_desc_append(&in_desc_head, &s_stream_in_desc); + dma_desc_append(&out_desc_head, &s_stream_out_desc); out_desc_tail = &s_stream_out_desc; } @@ -494,13 +539,13 @@ cleanup: * 3. If AES interrupt is enabled and ISR initialisation fails * 4. Failure in any of the AES operations */ -int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, lldesc_t *aad_desc, size_t aad_len) +int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, crypto_dma_desc_t *aad_desc, size_t aad_len) { - lldesc_t *in_desc_head = NULL, *out_desc_head = NULL, *len_desc = NULL; - lldesc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ - lldesc_t stream_in_desc, stream_out_desc; - lldesc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; - size_t lldesc_num; + crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL, *len_desc = NULL; + crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ + crypto_dma_desc_t stream_in_desc, stream_out_desc; + crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; + size_t crypto_dma_desc_num = 0; uint32_t len_buf[4] = {}; uint8_t stream_in[16] = {}; uint8_t stream_out[16] = {}; @@ -523,10 +568,10 @@ int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, un } /* Set up dma descriptors for input and output */ - lldesc_num = lldesc_get_required_num(block_bytes); + crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC); /* Allocate both in and out descriptors to save a malloc/free per function call, add 1 for length descriptor */ - block_desc = heap_caps_calloc( (lldesc_num * 2) + 1, sizeof(lldesc_t), MALLOC_CAP_DMA); + block_desc = heap_caps_calloc((crypto_dma_desc_num * 2) + 1, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); if (block_desc == NULL) { mbedtls_platform_zeroize(output, len); ESP_LOGE(TAG, "Failed to allocate memory"); @@ -534,32 +579,32 @@ int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, un } block_in_desc = block_desc; - len_desc = block_desc + lldesc_num; - block_out_desc = block_desc + lldesc_num + 1; + len_desc = block_desc + crypto_dma_desc_num; + block_out_desc = block_desc + crypto_dma_desc_num + 1; if (aad_desc != NULL) { - lldesc_append(&in_desc_head, aad_desc); + dma_desc_append(&in_desc_head, aad_desc); } if (block_bytes > 0) { - lldesc_setup_link(block_in_desc, input, block_bytes, 0); - lldesc_setup_link(block_out_desc, output, block_bytes, 0); + dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); + dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - lldesc_append(&in_desc_head, block_in_desc); - lldesc_append(&out_desc_head, block_out_desc); + dma_desc_append(&in_desc_head, block_in_desc); + dma_desc_append(&out_desc_head, block_out_desc); - out_desc_tail = &block_out_desc[lldesc_num - 1]; + out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; } /* Any leftover bytes which are appended as an additional DMA list */ if (stream_bytes > 0) { memcpy(stream_in, input + block_bytes, stream_bytes); - lldesc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, 0); - lldesc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, 0); + dma_desc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); + dma_desc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - lldesc_append(&in_desc_head, &stream_in_desc); - lldesc_append(&out_desc_head, &stream_out_desc); + dma_desc_append(&in_desc_head, &stream_in_desc); + dma_desc_append(&out_desc_head, &stream_out_desc); out_desc_tail = &stream_out_desc; } @@ -568,13 +613,13 @@ int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, un len_buf[1] = __builtin_bswap32(aad_len * 8); len_buf[3] = __builtin_bswap32(len * 8); - len_desc->length = sizeof(len_buf); - len_desc->size = sizeof(len_buf); - len_desc->owner = 1; - len_desc->eof = 1; - len_desc->buf = (uint8_t *)len_buf; + len_desc->dw0.length = sizeof(len_buf); + len_desc->dw0.size = sizeof(len_buf); + len_desc->dw0.owner = 1; + len_desc->dw0.suc_eof = 1; + len_desc->buffer = (uint8_t *)len_buf; - lldesc_append(&in_desc_head, len_desc); + dma_desc_append(&in_desc_head, len_desc); #if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) /* Only use interrupt for long AES operations */ diff --git a/components/mbedtls/port/aes/dma/esp_aes_crypto_dma_impl.c b/components/mbedtls/port/aes/dma/esp_aes_crypto_dma_impl.c index df8fd4bf49..8878375231 100644 --- a/components/mbedtls/port/aes/dma/esp_aes_crypto_dma_impl.c +++ b/components/mbedtls/port/aes/dma/esp_aes_crypto_dma_impl.c @@ -1,18 +1,10 @@ -// Copyright 2020 Espressif Systems (Shanghai) PTE LTD -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - +/* + * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include "esp_crypto_dma.h" #include "esp_aes_dma_priv.h" #include "soc/soc_caps.h" @@ -20,7 +12,7 @@ #include "hal/crypto_dma_ll.h" -esp_err_t esp_aes_dma_start(const lldesc_t *input, const lldesc_t *output) +esp_err_t esp_aes_dma_start(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output) { crypto_dma_ll_reset(); crypto_dma_ll_set_mode(CRYPTO_DMA_AES); @@ -36,7 +28,7 @@ esp_err_t esp_aes_dma_start(const lldesc_t *input, const lldesc_t *output) return ESP_OK; } -bool esp_aes_dma_done(const lldesc_t *output) +bool esp_aes_dma_done(const crypto_dma_desc_t *output) { - return (crypto_dma_ll_inlink_is_eof() && (output->owner == 0)); + return (crypto_dma_ll_inlink_is_eof() && (output->dw0.owner == 0)); } diff --git a/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c b/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c index 991a5600c5..c43bfd68ae 100644 --- a/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c +++ b/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c @@ -5,14 +5,15 @@ */ #include "esp_aes_dma_priv.h" +#include "esp_crypto_dma.h" #include "esp_crypto_shared_gdma.h" -esp_err_t esp_aes_dma_start(const lldesc_t *input, const lldesc_t *output) +esp_err_t esp_aes_dma_start(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output) { - return esp_crypto_shared_gdma_start(input, output, GDMA_TRIG_PERIPH_AES); + return esp_crypto_shared_gdma_start_axi_ahb(input, output, GDMA_TRIG_PERIPH_AES); } -bool esp_aes_dma_done(const lldesc_t *output) +bool esp_aes_dma_done(const crypto_dma_desc_t *output) { - return (output->owner == 0); + return (output->dw0.owner == 0); } diff --git a/components/mbedtls/port/aes/dma/include/esp_aes_dma_priv.h b/components/mbedtls/port/aes/dma/include/esp_aes_dma_priv.h index c219a9a77e..72c56dc034 100644 --- a/components/mbedtls/port/aes/dma/include/esp_aes_dma_priv.h +++ b/components/mbedtls/port/aes/dma/include/esp_aes_dma_priv.h @@ -6,7 +6,8 @@ #pragma once -#include "soc/lldesc.h" +#include +#include "esp_crypto_dma.h" #include "soc/soc_caps.h" #include "esp_err.h" @@ -23,7 +24,7 @@ extern "C" { * - ESP_OK: Successfully started the DMA * - ESP_ERR_INVALID_STATE: No DMA channel available */ -esp_err_t esp_aes_dma_start(const lldesc_t *input, const lldesc_t *output); +esp_err_t esp_aes_dma_start(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output); /** * @brief Check if the DMA engine is finished reading the result @@ -33,7 +34,7 @@ esp_err_t esp_aes_dma_start(const lldesc_t *input, const lldesc_t *output); * - true: DMA finished * - false: DMA not yet finished */ -bool esp_aes_dma_done(const lldesc_t *output); +bool esp_aes_dma_done(const crypto_dma_desc_t *output); /** * @brief Allocate AES peripheral interrupt handler diff --git a/components/mbedtls/port/aes/esp_aes_gcm.c b/components/mbedtls/port/aes/esp_aes_gcm.c index 76d68544d7..e7c34fc5b4 100644 --- a/components/mbedtls/port/aes/esp_aes_gcm.c +++ b/components/mbedtls/port/aes/esp_aes_gcm.c @@ -672,8 +672,8 @@ int esp_aes_gcm_crypt_and_tag( esp_gcm_context *ctx, #endif #if CONFIG_MBEDTLS_HARDWARE_GCM int ret; - lldesc_t aad_desc[2] = {}; - lldesc_t *aad_head_desc = NULL; + crypto_dma_desc_t aad_desc[2] = {}; + crypto_dma_desc_t *aad_head_desc = NULL; size_t remainder_bit; uint8_t stream_in[AES_BLOCK_BYTES] = {}; unsigned stream_bytes = aad_len % AES_BLOCK_BYTES; // bytes which aren't in a full block @@ -687,7 +687,7 @@ int esp_aes_gcm_crypt_and_tag( esp_gcm_context *ctx, /* Limit aad len to a single DMA descriptor to simplify DMA handling In practice, e.g. with mbedtls the length of aad will always be short */ - if (aad_len > LLDESC_MAX_NUM_PER_DESC) { + if (aad_len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC) { return MBEDTLS_ERR_GCM_BAD_INPUT; } /* IV and AD are limited to 2^32 bits, so 2^29 bytes */ @@ -723,20 +723,20 @@ int esp_aes_gcm_crypt_and_tag( esp_gcm_context *ctx, ctx->aes_ctx.key_in_hardware = aes_hal_setkey(ctx->aes_ctx.key, ctx->aes_ctx.key_bytes, mode); if (block_bytes > 0) { - aad_desc[0].length = block_bytes; - aad_desc[0].size = block_bytes; - aad_desc[0].owner = 1; - aad_desc[0].buf = aad; + aad_desc[0].dw0.length = block_bytes; + aad_desc[0].dw0.size = block_bytes; + aad_desc[0].dw0.owner = 1; + aad_desc[0].buffer = (void*)aad; } if (stream_bytes > 0) { memcpy(stream_in, aad + block_bytes, stream_bytes); - aad_desc[0].empty = (uint32_t)&aad_desc[1]; - aad_desc[1].length = AES_BLOCK_BYTES; - aad_desc[1].size = AES_BLOCK_BYTES; - aad_desc[1].owner = 1; - aad_desc[1].buf = stream_in; + aad_desc[0].next = &aad_desc[1]; + aad_desc[1].dw0.length = AES_BLOCK_BYTES; + aad_desc[1].dw0.size = AES_BLOCK_BYTES; + aad_desc[1].dw0.owner = 1; + aad_desc[1].buffer = (void*)stream_in; } if (block_bytes > 0) { diff --git a/components/mbedtls/port/aes/include/esp_aes_internal.h b/components/mbedtls/port/aes/include/esp_aes_internal.h index 889888e83f..e400e63f8a 100644 --- a/components/mbedtls/port/aes/include/esp_aes_internal.h +++ b/components/mbedtls/port/aes/include/esp_aes_internal.h @@ -6,10 +6,10 @@ #pragma once -#include "aes/esp_aes.h" -#include "aes/esp_aes_gcm.h" -#include "soc/soc_caps.h" #include +#include "aes/esp_aes.h" +#include "soc/soc_caps.h" +#include "esp_crypto_dma.h" #ifdef __cplusplus extern "C" { @@ -29,7 +29,7 @@ bool valid_key_length(const esp_aes_context *ctx); * @param aad_len GCM additional data length * @return int -1 on error */ -int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, lldesc_t *aad_desc, size_t aad_len); +int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, crypto_dma_desc_t *aad_desc, size_t aad_len); #endif #ifdef __cplusplus diff --git a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c index 83026a6274..8df3fd2c27 100644 --- a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c +++ b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2021-2022 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2021-2023 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -8,12 +8,21 @@ #include "freertos/FreeRTOS.h" #include "freertos/task.h" - -#include "hal/gdma_ll.h" -#include "soc/soc_caps.h" #include "esp_log.h" #include "esp_err.h" +#include "esp_crypto_dma.h" #include "esp_crypto_lock.h" +#include "soc/soc_caps.h" + +#if CONFIG_IDF_TARGET_ESP32P4 +#include "esp32p4/rom/cache.h" +#endif + +#if SOC_AHB_GDMA_VERSION == 1 +#include "hal/gdma_ll.h" +#elif SOC_AXI_GDMA_SUPPORTED +#include "hal/axi_dma_ll.h" +#endif /* SOC_AHB_GDMA_VERSION */ #define NEW_CHANNEL_TIMEOUT_MS 1000 #define NEW_CHANNEL_DELAY_MS 100 @@ -26,12 +35,15 @@ static gdma_channel_handle_t tx_channel; /* Allocate a new GDMA channel, will keep trying until NEW_CHANNEL_TIMEOUT_MS */ static inline esp_err_t crypto_shared_gdma_new_channel(gdma_channel_alloc_config_t *channel_config, gdma_channel_handle_t *channel) { - esp_err_t ret; + esp_err_t ret = ESP_FAIL; int time_waited_ms = 0; while (1) { - ret = gdma_new_channel(channel_config, channel); - +#if SOC_AXI_GDMA_SUPPORTED + ret = gdma_new_axi_channel(channel_config, channel); +#else /* !SOC_AXI_GDMA_SUPPORTED */ + ret = gdma_new_ahb_channel(channel_config, channel); +#endif /* SOC_AXI_GDMA_SUPPORTED */ if (ret == ESP_OK) { break; } else if (time_waited_ms >= NEW_CHANNEL_TIMEOUT_MS) { @@ -92,7 +104,6 @@ err: return ret; } - esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *output, gdma_trigger_peripheral_t peripheral) { int rx_ch_id = 0; @@ -121,7 +132,68 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou /* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */ gdma_get_channel_id(rx_channel, &rx_ch_id); + +#if SOC_AHB_GDMA_VERSION == 1 gdma_ll_rx_reset_channel(&GDMA, rx_ch_id); +#endif /* SOC_AHB_GDMA_VERSION */ + + gdma_start(tx_channel, (intptr_t)input); + gdma_start(rx_channel, (intptr_t)output); + + return ESP_OK; +} + +esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output, gdma_trigger_peripheral_t peripheral) +{ + int rx_ch_id = 0; + esp_err_t ret = ESP_OK; + +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + // TODO: replace with `esp_cache_msync` + const crypto_dma_desc_t *it = input; + while(it != NULL) { + Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it->buffer, it->dw0.length); // try using esp_cache_msync() + Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it, sizeof(crypto_dma_desc_t)); + it = (const crypto_dma_desc_t*) it->next; + } + + it = output; + while(it != NULL) { + Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it->buffer, it->dw0.length); + Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it, sizeof(crypto_dma_desc_t)); + it = (const crypto_dma_desc_t*) it->next; + }; +#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ + + if (tx_channel == NULL) { + /* Allocate a pair of RX and TX for crypto, should only happen the first time we use the GMDA + or if user called esp_crypto_shared_gdma_release */ + ret = crypto_shared_gdma_init(); + } + + if (ret != ESP_OK) { + return ret; + } + + /* Tx channel is shared between AES and SHA, need to connect to peripheral every time */ + gdma_disconnect(tx_channel); + + if (peripheral == GDMA_TRIG_PERIPH_SHA) { + gdma_connect(tx_channel, GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_SHA, 0)); + } else if (peripheral == GDMA_TRIG_PERIPH_AES) { + gdma_connect(tx_channel, GDMA_MAKE_TRIGGER(GDMA_TRIG_PERIPH_AES, 0)); + } else { + return ESP_ERR_INVALID_ARG; + } + + /* tx channel is reset by gdma_connect(), also reset rx to ensure a known state */ + gdma_get_channel_id(rx_channel, &rx_ch_id); + +#if SOC_AHB_GDMA_VERSION == 1 + gdma_ll_rx_reset_channel(&GDMA, rx_ch_id); +#elif SOC_AXI_GDMA_SUPPORTED + axi_dma_ll_rx_reset_channel(&AXI_DMA, rx_ch_id); +#endif /* SOC_AHB_GDMA_VERSION */ gdma_start(tx_channel, (intptr_t)input); gdma_start(rx_channel, (intptr_t)output); diff --git a/components/mbedtls/port/include/aes/esp_aes_gcm.h b/components/mbedtls/port/include/aes/esp_aes_gcm.h index fb9cc1260c..c270c9f97b 100644 --- a/components/mbedtls/port/include/aes/esp_aes_gcm.h +++ b/components/mbedtls/port/include/aes/esp_aes_gcm.h @@ -12,7 +12,6 @@ #include "aes/esp_aes.h" #include "mbedtls/cipher.h" -#include "soc/lldesc.h" #ifdef __cplusplus extern "C" { diff --git a/components/mbedtls/port/include/esp_crypto_dma.h b/components/mbedtls/port/include/esp_crypto_dma.h new file mode 100644 index 0000000000..8eadc1aa26 --- /dev/null +++ b/components/mbedtls/port/include/esp_crypto_dma.h @@ -0,0 +1,45 @@ +/* + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include "hal/dma_types.h" +#include "soc/gdma_channel.h" +#include "soc/soc_caps.h" + + +#ifdef __cplusplus +extern "C" +{ +#endif + + +#if (SOC_AES_SUPPORT_DMA) && (SOC_SHA_SUPPORT_DMA) + +#if (SOC_AES_GDMA) && (SOC_SHA_GDMA) + +#if (SOC_GDMA_TRIG_PERIPH_AES0_BUS == SOC_GDMA_BUS_AHB) && (SOC_GDMA_TRIG_PERIPH_SHA0_BUS == SOC_GDMA_BUS_AHB) +#define DMA_DESC_MEM_ALIGN_SIZE 4 +typedef dma_descriptor_align4_t crypto_dma_desc_t; +#elif (SOC_GDMA_TRIG_PERIPH_AES0_BUS == SOC_GDMA_BUS_AXI) && (SOC_GDMA_TRIG_PERIPH_SHA0_BUS == SOC_GDMA_BUS_AXI) +#define DMA_DESC_MEM_ALIGN_SIZE 8 +typedef dma_descriptor_align8_t crypto_dma_desc_t; +#else +#error "As we support a shared crypto GDMA layer for the AES and the SHA peripheral, both the peripherals must use the same GDMA bus" +#endif /* (SOC_GDMA_TRIG_PERIPH_AES0_BUS == SOC_GDMA_BUS_AHB) && (SOC_GDMA_TRIG_PERIPH_AES0_BUS == SOC_GDMA_BUS_AHB) */ + +#elif (SOC_AES_CRYPTO_DMA) && (SOC_SHA_CRYPTO_DMA) +#define DMA_DESC_MEM_ALIGN_SIZE 4 +typedef dma_descriptor_align4_t crypto_dma_desc_t; + +#endif /* (SOC_AES_GDMA) && (SOC_SHA_GDMA) */ + +#endif /* (SOC_AES_SUPPORT_DMA) && (SOC_SHA_SUPPORT_DMA) */ + + +#ifdef __cplusplus +} +#endif diff --git a/components/mbedtls/port/include/esp_crypto_shared_gdma.h b/components/mbedtls/port/include/esp_crypto_shared_gdma.h index 4e69a5f8ea..fd49558912 100644 --- a/components/mbedtls/port/include/esp_crypto_shared_gdma.h +++ b/components/mbedtls/port/include/esp_crypto_shared_gdma.h @@ -1,14 +1,15 @@ /* - * SPDX-FileCopyrightText: 2021-2022 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2021-2023 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ #pragma once -#include "soc/lldesc.h" +#include "esp_crypto_dma.h" #include "esp_private/gdma.h" #include "esp_err.h" +#include "soc/lldesc.h" #ifdef __cplusplus extern "C" { @@ -16,17 +17,31 @@ extern "C" { /** * @brief Start a GDMA transfer on the shared crypto DMA channel + * Only supports AHB-DMA. * * @note Will allocate a GDMA channel for AES & SHA if no such channel is already allocated * - * @param input Input linked list descriptor - * @param output Output linked list descriptor + * @param input Input linked list descriptor (lldesc_t *) + * @param output Output linked list descriptor (lldesc_t *) * @param peripheral Crypto peripheral to connect the DMA to, either GDMA_TRIG_PERIPH_AES or * GDMA_TRIG_PERIPH_SHA * @return esp_err_t ESP_FAIL if no GDMA channel available */ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *output, gdma_trigger_peripheral_t peripheral); +/** + * @brief Start a GDMA transfer on the shared crypto DMA channel + * Supports AXI-DMA and AHB-DMA. + * + * @note Will allocate a GDMA channel for AES & SHA if no such channel is already allocated + * + * @param input Input linked list descriptor (crypto_dma_desc_t *) + * @param output Output linked list descriptor (crypto_dma_desc_t *) + * @param peripheral Crypto peripheral to connect the DMA to, either GDMA_TRIG_PERIPH_AES or + * GDMA_TRIG_PERIPH_SHA + * @return esp_err_t ESP_FAIL if no GDMA channel available + */ +esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output, gdma_trigger_peripheral_t peripheral); /** * @brief Frees any shared crypto DMA channel, if esp_crypto_shared_gdma_start is called after diff --git a/components/mbedtls/test_apps/main/test_aes_sha_parallel.c b/components/mbedtls/test_apps/main/test_aes_sha_parallel.c index ddbb5baffa..a018eca9ab 100644 --- a/components/mbedtls/test_apps/main/test_aes_sha_parallel.c +++ b/components/mbedtls/test_apps/main/test_aes_sha_parallel.c @@ -112,8 +112,6 @@ static void tskRunAES256Test(void *pvParameters) } -#include "esp_crypto_shared_gdma.h" - #define TASK_STACK_SIZE (20*1024) TEST_CASE("mbedtls AES/SHA multithreading", "[mbedtls]") diff --git a/tools/ci/check_copyright_ignore.txt b/tools/ci/check_copyright_ignore.txt index c1fbfb935a..4092aef14a 100644 --- a/tools/ci/check_copyright_ignore.txt +++ b/tools/ci/check_copyright_ignore.txt @@ -504,7 +504,6 @@ components/lwip/include/apps/ping/ping.h components/mbedtls/esp_crt_bundle/test_gen_crt_bundle/test_gen_crt_bundle.py components/mbedtls/port/aes/block/esp_aes.c components/mbedtls/port/aes/dma/esp_aes.c -components/mbedtls/port/aes/dma/esp_aes_crypto_dma_impl.c components/mbedtls/port/aes/esp_aes_xts.c components/mbedtls/port/include/aes/esp_aes.h components/mbedtls/port/include/aes_alt.h From a6012c9e5ae60c55216efdc9fcc4ac77454b1ddd Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Mon, 30 Oct 2023 17:03:02 +0530 Subject: [PATCH 3/7] feat(mbedtls/aes): add AES peripheral support for esp32p4 --- components/hal/esp32p4/include/hal/aes_ll.h | 225 ++++++++++++++++++ components/mbedtls/port/aes/dma/esp_aes.c | 29 ++- .../esp32p4/include/soc/Kconfig.soc_caps.in | 4 + components/soc/esp32p4/include/soc/soc_caps.h | 2 +- 4 files changed, 250 insertions(+), 10 deletions(-) create mode 100644 components/hal/esp32p4/include/hal/aes_ll.h diff --git a/components/hal/esp32p4/include/hal/aes_ll.h b/components/hal/esp32p4/include/hal/aes_ll.h new file mode 100644 index 0000000000..031baf2055 --- /dev/null +++ b/components/hal/esp32p4/include/hal/aes_ll.h @@ -0,0 +1,225 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#pragma once + +#include +#include +#include "soc/hwcrypto_reg.h" +#include "hal/aes_types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief State of AES accelerator, busy, idle or done + * + */ +typedef enum { + ESP_AES_STATE_IDLE = 0, /* AES accelerator is idle */ + ESP_AES_STATE_BUSY, /* Transform in progress */ + ESP_AES_STATE_DONE, /* Transform completed */ +} esp_aes_state_t; + + +/** + * @brief Write the encryption/decryption key to hardware + * + * @param key Key to be written to the AES hardware + * @param key_word_len Number of words in the key + * + * @return Number of bytes written to hardware, used for fault injection check + */ +static inline uint8_t aes_ll_write_key(const uint8_t *key, size_t key_word_len) +{ + /* This variable is used for fault injection checks, so marked volatile to avoid optimisation */ + volatile uint8_t key_in_hardware = 0; + /* Memcpy to avoid potential unaligned access */ + uint32_t key_word; + for (int i = 0; i < key_word_len; i++) { + memcpy(&key_word, key + 4 * i, 4); + REG_WRITE(AES_KEY_0_REG + i * 4, key_word); + key_in_hardware += 4; + } + return key_in_hardware; +} + +/** + * @brief Sets the mode + * + * @param mode ESP_AES_ENCRYPT = 1, or ESP_AES_DECRYPT = 0 + * @param key_bytes Number of bytes in the key + */ +static inline void aes_ll_set_mode(int mode, uint8_t key_bytes) +{ + const uint32_t MODE_DECRYPT_BIT = 4; + unsigned mode_reg_base = (mode == ESP_AES_ENCRYPT) ? 0 : MODE_DECRYPT_BIT; + + /* See TRM for the mapping between keylength and mode bit */ + REG_WRITE(AES_MODE_REG, mode_reg_base + ((key_bytes / 8) - 2)); +} + +/** + * @brief Writes message block to AES hardware + * + * @param input Block to be written + */ +static inline void aes_ll_write_block(const void *input) +{ + uint32_t input_word; + + for (int i = 0; i < AES_BLOCK_WORDS; i++) { + memcpy(&input_word, (uint8_t*)input + 4 * i, 4); + REG_WRITE(AES_TEXT_IN_0_REG + i * 4, input_word); + } +} + +/** + * @brief Read the AES block + * + * @param output the output of the transform, length = AES_BLOCK_BYTES + */ +static inline void aes_ll_read_block(void *output) +{ + uint32_t output_word; + const size_t REG_WIDTH = sizeof(uint32_t); + + for (size_t i = 0; i < AES_BLOCK_WORDS; i++) { + output_word = REG_READ(AES_TEXT_OUT_0_REG + (i * REG_WIDTH)); + /* Memcpy to avoid potential unaligned access */ + memcpy( (uint8_t*)output + i * 4, &output_word, sizeof(output_word)); + } +} + +/** + * @brief Starts block transform + * + */ +static inline void aes_ll_start_transform(void) +{ + REG_WRITE(AES_TRIGGER_REG, 1); +} + + +/** + * @brief Read state of AES accelerator + * + * @return esp_aes_state_t + */ +static inline esp_aes_state_t aes_ll_get_state(void) +{ + return (esp_aes_state_t)REG_READ(AES_STATE_REG); +} + + +/** + * @brief Set mode of operation + * + * @note Only used for DMA transforms + * + * @param mode + */ +static inline void aes_ll_set_block_mode(esp_aes_mode_t mode) +{ + REG_WRITE(AES_BLOCK_MODE_REG, mode); +} + +/** + * @brief Set AES-CTR counter to INC32 + * + * @note Only affects AES-CTR mode + * + */ +static inline void aes_ll_set_inc(void) +{ + REG_WRITE(AES_INC_SEL_REG, 0); +} + +/** + * @brief Release the DMA + * + */ +static inline void aes_ll_dma_exit(void) +{ + REG_WRITE(AES_DMA_EXIT_REG, 0); +} + +/** + * @brief Sets the number of blocks to be transformed + * + * @note Only used for DMA transforms + * + * @param num_blocks Number of blocks to transform + */ +static inline void aes_ll_set_num_blocks(size_t num_blocks) +{ + REG_WRITE(AES_BLOCK_NUM_REG, num_blocks); +} + +/* + * Write IV to hardware iv registers + */ +static inline void aes_ll_set_iv(const uint8_t *iv) +{ + uint32_t *reg_addr_buf = (uint32_t *)(AES_IV_MEM); + uint32_t iv_word; + + for (int i = 0; i < IV_WORDS; i++ ) { + /* Memcpy to avoid potential unaligned access */ + memcpy(&iv_word, iv + 4 * i, sizeof(iv_word)); + REG_WRITE(®_addr_buf[i], iv_word); + } +} + +/* + * Read IV from hardware iv registers + */ +static inline void aes_ll_read_iv(uint8_t *iv) +{ + uint32_t iv_word; + const size_t REG_WIDTH = sizeof(uint32_t); + + for (size_t i = 0; i < IV_WORDS; i++) { + iv_word = REG_READ(AES_IV_MEM + (i * REG_WIDTH)); + /* Memcpy to avoid potential unaligned access */ + memcpy(iv + i * 4, &iv_word, sizeof(iv_word)); + } +} + +/** + * @brief Enable or disable DMA mode + * + * @param enable true to enable, false to disable. + */ +static inline void aes_ll_dma_enable(bool enable) +{ + REG_WRITE(AES_DMA_ENABLE_REG, enable); +} + +/** + * @brief Enable or disable transform completed interrupt + * + * @param enable true to enable, false to disable. + */ +static inline void aes_ll_interrupt_enable(bool enable) +{ + REG_WRITE(AES_INT_ENA_REG, enable); +} + +/** + * @brief Clears the interrupt + * + */ +static inline void aes_ll_interrupt_clear(void) +{ + REG_WRITE(AES_INT_CLEAR_REG, 1); +} + + +#ifdef __cplusplus +} +#endif diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index 9aaa327ff9..42875deaeb 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -49,6 +49,8 @@ #include "esp32s2/rom/cache.h" #elif CONFIG_IDF_TARGET_ESP32S3 #include "esp32s3/rom/cache.h" +#elif CONFIG_IDF_TARGET_ESP32P4 +#include "esp32p4/rom/cache.h" #endif #include "freertos/FreeRTOS.h" @@ -103,10 +105,10 @@ static bool s_check_dma_capable(const void *p); * * Must be in DMA capable memory, so stack is not a safe place to put them * * To avoid having to malloc/free them for every DMA operation */ -static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; -static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; -static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; -static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; +DMA_DESC_ALIGN_ATTR static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; +DMA_DESC_ALIGN_ATTR static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; +DMA_DESC_ALIGN_ATTR static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; +DMA_DESC_ALIGN_ATTR static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; /** Append a descriptor to the chain, set head if chain empty * @@ -265,7 +267,7 @@ static esp_err_t esp_aes_isr_initialise( void ) #endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT /* Wait for AES hardware block operation to complete */ -static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_desc) +static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_desc_head, crypto_dma_desc_t *output_desc_tail) { #if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) if (use_intr) { @@ -285,7 +287,16 @@ static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_de */ aes_hal_wait_done(); - esp_aes_wait_dma_done(output_desc); +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + const crypto_dma_desc_t *it = output_desc_head; + while(it != NULL) { + Cache_Invalidate_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it->buffer, it->dw0.length); + Cache_Invalidate_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it, sizeof(crypto_dma_desc_t)); + it = (const crypto_dma_desc_t*) it->next; + }; +#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ + + esp_aes_wait_dma_done(output_desc_tail); return 0; } @@ -435,7 +446,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED); /* Allocate both in and out descriptors to save a malloc/free per function call */ - block_desc = heap_caps_calloc(crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); + block_desc = heap_caps_aligned_calloc(DMA_DESC_MEM_ALIGN_SIZE, crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); if (block_desc == NULL) { mbedtls_platform_zeroize(output, len); ESP_LOGE(TAG, "Failed to allocate memory"); @@ -501,7 +512,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, aes_hal_transform_dma_start(blocks); - if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + if (esp_aes_dma_wait_complete(use_intr, out_desc_head, out_desc_tail) < 0) { ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); ret = -1; goto cleanup; @@ -645,7 +656,7 @@ int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, un aes_hal_transform_dma_gcm_start(blocks); - if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + if (esp_aes_dma_wait_complete(use_intr, out_desc_head, out_desc_tail) < 0) { ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); ret = -1; goto cleanup; diff --git a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in index 5460a2551c..b36f3ed040 100644 --- a/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in +++ b/components/soc/esp32p4/include/soc/Kconfig.soc_caps.in @@ -131,6 +131,10 @@ config SOC_SYSTIMER_SUPPORTED bool default y +config SOC_AES_SUPPORTED + bool + default y + config SOC_MPI_SUPPORTED bool default y diff --git a/components/soc/esp32p4/include/soc/soc_caps.h b/components/soc/esp32p4/include/soc/soc_caps.h index 9fd1a1855b..cf7f454936 100644 --- a/components/soc/esp32p4/include/soc/soc_caps.h +++ b/components/soc/esp32p4/include/soc/soc_caps.h @@ -55,7 +55,7 @@ #define SOC_LEDC_SUPPORTED 1 #define SOC_I2C_SUPPORTED 1 #define SOC_SYSTIMER_SUPPORTED 1 -// #define SOC_AES_SUPPORTED 1 //TODO: IDF-6519 +#define SOC_AES_SUPPORTED 1 #define SOC_MPI_SUPPORTED 1 // #define SOC_SHA_SUPPORTED 1 //TODO: IDF-7541 #define SOC_HMAC_SUPPORTED 1 From 9b151895830221c87614b1e57903c3305323ee4b Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Tue, 31 Oct 2023 14:25:06 +0530 Subject: [PATCH 4/7] change(mbedtls): replace Cache ROM APIs usage with APIs from esp_cache.h - Only APIs used in esp_crypto_shared_gdma and aes have been replaced - Get dcache line size using cache api instead of Kconfig --- .../hal/test_apps/crypto/main/CMakeLists.txt | 2 +- components/mbedtls/CMakeLists.txt | 10 ++++-- components/mbedtls/port/aes/dma/esp_aes.c | 35 +++++++++---------- .../esp_crypto_shared_gdma.c | 13 +++---- 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/components/hal/test_apps/crypto/main/CMakeLists.txt b/components/hal/test_apps/crypto/main/CMakeLists.txt index 460bf3b120..40667b9033 100644 --- a/components/hal/test_apps/crypto/main/CMakeLists.txt +++ b/components/hal/test_apps/crypto/main/CMakeLists.txt @@ -56,7 +56,7 @@ if(CONFIG_SOC_SHA_SUPPORTED) endif() idf_component_register(SRCS ${srcs} - PRIV_REQUIRES efuse mbedtls + PRIV_REQUIRES efuse mbedtls esp_mm REQUIRES test_utils unity WHOLE_ARCHIVE PRIV_INCLUDE_DIRS "${priv_include_dirs}" diff --git a/components/mbedtls/CMakeLists.txt b/components/mbedtls/CMakeLists.txt index 57b832fd99..9a2a3d31dd 100644 --- a/components/mbedtls/CMakeLists.txt +++ b/components/mbedtls/CMakeLists.txt @@ -180,7 +180,6 @@ if(SHA_PERIPHERAL_TYPE STREQUAL "dma") set(SHA_DMA_SRCS "${COMPONENT_DIR}/port/sha/dma/esp_sha_crypto_dma_impl.c") else() set(SHA_DMA_SRCS "${COMPONENT_DIR}/port/sha/dma/esp_sha_gdma_impl.c") - endif() target_sources(mbedcrypto PRIVATE "${SHA_DMA_SRCS}") endif() @@ -189,14 +188,19 @@ if(AES_PERIPHERAL_TYPE STREQUAL "dma") if(NOT CONFIG_SOC_AES_GDMA) set(AES_DMA_SRCS "${COMPONENT_DIR}/port/aes/dma/esp_aes_crypto_dma_impl.c") else() - set(AES_DMA_SRCS "${COMPONENT_DIR}/port/aes/dma/esp_aes_gdma_impl.c" - "${COMPONENT_DIR}/port/crypto_shared_gdma/esp_crypto_shared_gdma.c") + set(AES_DMA_SRCS "${COMPONENT_DIR}/port/aes/dma/esp_aes_gdma_impl.c") endif() target_include_directories(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/aes/dma/include") target_sources(mbedcrypto PRIVATE "${AES_DMA_SRCS}") endif() +if(SHA_PERIPHERAL_TYPE STREQUAL "dma" OR AES_PERIPHERAL_TYPE STREQUAL "dma") + target_link_libraries(mbedcrypto PRIVATE idf::esp_mm) + if(CONFIG_SOC_SHA_GDMA OR CONFIG_SOC_AES_GDMA) + target_sources(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/crypto_shared_gdma/esp_crypto_shared_gdma.c") + endif() +endif() if(NOT ${IDF_TARGET} STREQUAL "linux") target_sources(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/esp_hardware.c") diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index 42875deaeb..4be296b826 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -36,6 +36,7 @@ #include "esp_crypto_dma.h" #include "esp_heap_caps.h" #include "esp_memory_utils.h" +#include "esp_cache.h" #include "sys/param.h" #if CONFIG_PM_ENABLE #include "esp_pm.h" @@ -44,14 +45,7 @@ #include "hal/aes_hal.h" #include "esp_aes_dma_priv.h" #include "esp_aes_internal.h" - -#if CONFIG_IDF_TARGET_ESP32S2 -#include "esp32s2/rom/cache.h" -#elif CONFIG_IDF_TARGET_ESP32S3 -#include "esp32s3/rom/cache.h" -#elif CONFIG_IDF_TARGET_ESP32P4 -#include "esp32p4/rom/cache.h" -#endif +#include "esp_private/esp_cache_private.h" #include "freertos/FreeRTOS.h" #include "freertos/semphr.h" @@ -105,10 +99,10 @@ static bool s_check_dma_capable(const void *p); * * Must be in DMA capable memory, so stack is not a safe place to put them * * To avoid having to malloc/free them for every DMA operation */ -DMA_DESC_ALIGN_ATTR static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; -DMA_DESC_ALIGN_ATTR static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; -DMA_DESC_ALIGN_ATTR static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; -DMA_DESC_ALIGN_ATTR static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; +static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; +static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; +static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; +static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; /** Append a descriptor to the chain, set head if chain empty * @@ -290,8 +284,8 @@ static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_de #if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE const crypto_dma_desc_t *it = output_desc_head; while(it != NULL) { - Cache_Invalidate_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it->buffer, it->dw0.length); - Cache_Invalidate_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it, sizeof(crypto_dma_desc_t)); + esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); + esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); it = (const crypto_dma_desc_t*) it->next; }; #endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ @@ -419,10 +413,15 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, /* Flush cache if input in external ram */ #if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) if (esp_ptr_external_ram(input)) { - Cache_WriteBack_Addr((uint32_t)input, len); + esp_cache_msync((void *)input, len, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); } if (esp_ptr_external_ram(output)) { - if ((((intptr_t)(output) & (DCACHE_LINE_SIZE - 1)) != 0) || (block_bytes % DCACHE_LINE_SIZE != 0)) { + uint32_t dcache_line_size; + esp_err_t ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_PSRAM, &dcache_line_size); + if (ret != ESP_OK) { + return ret; + } + if ((((intptr_t)(output) & (dcache_line_size - 1)) != 0) || (block_bytes % dcache_line_size != 0)) { // Non aligned ext-mem buffer output_needs_realloc = true; } @@ -446,7 +445,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED); /* Allocate both in and out descriptors to save a malloc/free per function call */ - block_desc = heap_caps_aligned_calloc(DMA_DESC_MEM_ALIGN_SIZE, crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); + block_desc = heap_caps_aligned_calloc(8, crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); if (block_desc == NULL) { mbedtls_platform_zeroize(output, len); ESP_LOGE(TAG, "Failed to allocate memory"); @@ -521,7 +520,7 @@ static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, #if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) if (block_bytes > 0) { if (esp_ptr_external_ram(output)) { - Cache_Invalidate_Addr((uint32_t)output, block_bytes); + esp_cache_msync((void*)output, block_bytes, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); } } #endif diff --git a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c index 8df3fd2c27..972c816800 100644 --- a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c +++ b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c @@ -10,14 +10,11 @@ #include "freertos/task.h" #include "esp_log.h" #include "esp_err.h" +#include "esp_cache.h" #include "esp_crypto_dma.h" #include "esp_crypto_lock.h" #include "soc/soc_caps.h" -#if CONFIG_IDF_TARGET_ESP32P4 -#include "esp32p4/rom/cache.h" -#endif - #if SOC_AHB_GDMA_VERSION == 1 #include "hal/gdma_ll.h" #elif SOC_AXI_GDMA_SUPPORTED @@ -152,15 +149,15 @@ esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, c // TODO: replace with `esp_cache_msync` const crypto_dma_desc_t *it = input; while(it != NULL) { - Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it->buffer, it->dw0.length); // try using esp_cache_msync() - Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it, sizeof(crypto_dma_desc_t)); + esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); + esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); it = (const crypto_dma_desc_t*) it->next; } it = output; while(it != NULL) { - Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it->buffer, it->dw0.length); - Cache_WriteBack_Addr(CACHE_MAP_L1_DCACHE | CACHE_MAP_L2_CACHE, (uint32_t)it, sizeof(crypto_dma_desc_t)); + esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); + esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); it = (const crypto_dma_desc_t*) it->next; }; #endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ From 2abb656ba2d14720ca7c22c636244cd9351b4643 Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Tue, 12 Dec 2023 21:12:41 +0530 Subject: [PATCH 5/7] feat(mbedtls/aes): Support AES-DMA operations by satisfying L1 cache alignment requirements - Use DMA RX done interrupt status bit while waiting for DMA rx transfer --- components/hal/include/hal/dma_types.h | 5 - components/mbedtls/CMakeLists.txt | 2 + components/mbedtls/port/aes/dma/esp_aes.c | 618 ----------- .../mbedtls/port/aes/dma/esp_aes_dma_core.c | 992 ++++++++++++++++++ .../mbedtls/port/aes/dma/esp_aes_gdma_impl.c | 6 +- components/mbedtls/port/aes/esp_aes_gcm.c | 9 +- .../port/aes/include/esp_aes_internal.h | 16 +- .../esp_crypto_shared_gdma.c | 41 +- .../port/include/esp_crypto_shared_gdma.h | 11 +- 9 files changed, 1048 insertions(+), 652 deletions(-) create mode 100644 components/mbedtls/port/aes/dma/esp_aes_dma_core.c diff --git a/components/hal/include/hal/dma_types.h b/components/hal/include/hal/dma_types.h index 52e6c542c7..435ac5cd96 100644 --- a/components/hal/include/hal/dma_types.h +++ b/components/hal/include/hal/dma_types.h @@ -59,11 +59,6 @@ ESP_STATIC_ASSERT(sizeof(dma_descriptor_align8_t) == 16, "dma_descriptor_align8_ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED (4095-3) /*!< Maximum size of the buffer that can be attached to descriptor, and aligned to 4B */ #define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED (4095-15) /*!< Maximum size of the buffer that can be attached to descriptor, and aligned to 16B */ -// the size field has 12 bits, but 0 not for 4096. -// to avoid possible problem when the size is not word-aligned, we only use 4096-4 per desc. -/** Maximum size of data in the buffer that a DMA descriptor can hold. */ -#define DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC (4096-4) - /** * Get the number of DMA descriptors required for a given buffer size. * diff --git a/components/mbedtls/CMakeLists.txt b/components/mbedtls/CMakeLists.txt index 9a2a3d31dd..e92ac5d0a4 100644 --- a/components/mbedtls/CMakeLists.txt +++ b/components/mbedtls/CMakeLists.txt @@ -191,6 +191,8 @@ if(AES_PERIPHERAL_TYPE STREQUAL "dma") set(AES_DMA_SRCS "${COMPONENT_DIR}/port/aes/dma/esp_aes_gdma_impl.c") endif() + list(APPEND AES_DMA_SRCS "${COMPONENT_DIR}/port/aes/dma/esp_aes_dma_core.c") + target_include_directories(mbedcrypto PRIVATE "${COMPONENT_DIR}/port/aes/dma/include") target_sources(mbedcrypto PRIVATE "${AES_DMA_SRCS}") endif() diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index 4be296b826..d10c07c5e2 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -28,27 +28,11 @@ #include #include "mbedtls/aes.h" -#include "mbedtls/platform_util.h" -#include "esp_intr_alloc.h" #include "esp_private/periph_ctrl.h" #include "esp_log.h" -#include "esp_attr.h" -#include "esp_crypto_dma.h" -#include "esp_heap_caps.h" -#include "esp_memory_utils.h" -#include "esp_cache.h" -#include "sys/param.h" -#if CONFIG_PM_ENABLE -#include "esp_pm.h" -#endif #include "esp_crypto_lock.h" #include "hal/aes_hal.h" -#include "esp_aes_dma_priv.h" #include "esp_aes_internal.h" -#include "esp_private/esp_cache_private.h" - -#include "freertos/FreeRTOS.h" -#include "freertos/semphr.h" #if SOC_AES_GDMA #define AES_LOCK() esp_crypto_sha_aes_lock_acquire() @@ -58,123 +42,7 @@ #define AES_RELEASE() esp_crypto_dma_lock_release() #endif -/* Max size of each chunk to process when output buffer is in unaligned external ram - must be a multiple of block size -*/ -#define AES_MAX_CHUNK_WRITE_SIZE 1600 - -/* Input over this length will yield and wait for interrupt instead of - busy-waiting, 30000 bytes is approx 0.5 ms */ -#define AES_DMA_INTR_TRIG_LEN 2000 - -/* With buffers in PSRAM (worst condition) we still achieve a speed of 4 MB/s - thus a 2 second timeout value should be suffient for even very large buffers. - */ -#define AES_WAIT_INTR_TIMEOUT_MS 2000 - -#if defined(CONFIG_MBEDTLS_AES_USE_INTERRUPT) -static SemaphoreHandle_t op_complete_sem; -#if defined(CONFIG_PM_ENABLE) -static esp_pm_lock_handle_t s_pm_cpu_lock; -static esp_pm_lock_handle_t s_pm_sleep_lock; -#endif -#endif - -#if SOC_PSRAM_DMA_CAPABLE - -#if (CONFIG_ESP32S2_DATA_CACHE_LINE_16B || CONFIG_ESP32S3_DATA_CACHE_LINE_16B) -#define DCACHE_LINE_SIZE 16 -#elif (CONFIG_ESP32S2_DATA_CACHE_LINE_32B || CONFIG_ESP32S3_DATA_CACHE_LINE_32B) -#define DCACHE_LINE_SIZE 32 -#elif CONFIG_ESP32S3_DATA_CACHE_LINE_64B -#define DCACHE_LINE_SIZE 64 -#endif //(CONFIG_ESP32S2_DATA_CACHE_LINE_16B || CONFIG_ESP32S3_DATA_CACHE_LINE_16B) - -#endif //SOC_PSRAM_DMA_CAPABLE - static const char *TAG = "esp-aes"; -static bool s_check_dma_capable(const void *p); - -/* These are static due to: - * * Must be in DMA capable memory, so stack is not a safe place to put them - * * To avoid having to malloc/free them for every DMA operation - */ -static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; -static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; -static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; -static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; - -/** Append a descriptor to the chain, set head if chain empty - * - * @param[out] head Pointer to the first/head node of the DMA descriptor linked list - * @param item Pointer to the DMA descriptor node that has to be appended - */ -static inline void dma_desc_append(crypto_dma_desc_t **head, crypto_dma_desc_t *item) -{ - crypto_dma_desc_t *it; - if (*head == NULL) { - *head = item; - return; - } - - it = *head; - - while (it->next != 0) { - it = (crypto_dma_desc_t *)it->next; - } - it->dw0.suc_eof = 0; - it->next = item; -} - -/** - * Generate a linked list pointing to a (huge) buffer in an descriptor array. - * - * The caller should ensure there is enough size to hold the array, by calling - * `dma_desc_get_required_num` with the same or less than the max_desc_size argument. - * - * @param[out] dmadesc Output of a descriptor array, the head should be fed to the DMA. - * @param data Buffer for the descriptors to point to. - * @param len Size (or length for TX) of the buffer - * @param max_desc_size Maximum length of each descriptor - * @param isrx The RX DMA may require the buffer to be word-aligned, set to true for a RX link, otherwise false. - */ -static inline void dma_desc_setup_link(crypto_dma_desc_t* dmadesc, const void *data, int len, int max_desc_size, bool isrx) -{ - int i = 0; - while (len) { - int dmachunklen = len; - if (dmachunklen > max_desc_size) { - dmachunklen = max_desc_size; - } - if (isrx) { - //Receive needs DMA length rounded to next 32-bit boundary - dmadesc[i].dw0.size = (dmachunklen + 3) & (~3); - dmadesc[i].dw0.length = (dmachunklen + 3) & (~3); - } else { - dmadesc[i].dw0.size = dmachunklen; - dmadesc[i].dw0.length = dmachunklen; - } - dmadesc[i].buffer = (void *)data; - dmadesc[i].dw0.suc_eof = 0; - dmadesc[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; - dmadesc[i].next = &dmadesc[i + 1]; - len -= dmachunklen; - data += dmachunklen; - i++; - } - dmadesc[i - 1].dw0.suc_eof = 1; //Mark last DMA desc as end of stream. - dmadesc[i - 1].next = NULL; -} - -static inline void esp_aes_wait_dma_done(crypto_dma_desc_t *output) -{ - /* Wait for DMA write operation to complete */ - while (1) { - if ( esp_aes_dma_done(output) ) { - break; - } - } -} void esp_aes_acquire_hardware( void ) { @@ -202,481 +70,6 @@ void esp_aes_release_hardware( void ) AES_RELEASE(); } - -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) -static IRAM_ATTR void esp_aes_complete_isr(void *arg) -{ - BaseType_t higher_woken; - aes_hal_interrupt_clear(); - xSemaphoreGiveFromISR(op_complete_sem, &higher_woken); - if (higher_woken) { - portYIELD_FROM_ISR(); - } -} - -void esp_aes_intr_alloc(void) -{ - if (op_complete_sem == NULL) { - const int isr_flags = esp_intr_level_to_flags(CONFIG_MBEDTLS_AES_INTERRUPT_LEVEL); - - esp_err_t ret = esp_intr_alloc(ETS_AES_INTR_SOURCE, isr_flags, esp_aes_complete_isr, NULL, NULL); - if (ret != ESP_OK) { - ESP_LOGE(TAG, "Failed to allocate AES interrupt %d", ret); - // This should be treated as fatal error as this API would mostly - // be invoked within mbedTLS interface. There is no way for the system - // to proceed if the AES interrupt allocation fails here. - abort(); - } - - static StaticSemaphore_t op_sem_buf; - op_complete_sem = xSemaphoreCreateBinaryStatic(&op_sem_buf); - // Static semaphore creation is unlikley to fail but still basic sanity - assert(op_complete_sem != NULL); - } -} - -static esp_err_t esp_aes_isr_initialise( void ) -{ - aes_hal_interrupt_clear(); - aes_hal_interrupt_enable(true); - - /* AES is clocked proportionally to CPU clock, take power management lock */ -#ifdef CONFIG_PM_ENABLE - if (s_pm_cpu_lock == NULL) { - if (esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "aes_sleep", &s_pm_sleep_lock) != ESP_OK) { - ESP_LOGE(TAG, "Failed to create PM sleep lock"); - return ESP_FAIL; - } - if (esp_pm_lock_create(ESP_PM_CPU_FREQ_MAX, 0, "aes_cpu", &s_pm_cpu_lock) != ESP_OK) { - ESP_LOGE(TAG, "Failed to create PM CPU lock"); - return ESP_FAIL; - } - } - esp_pm_lock_acquire(s_pm_cpu_lock); - esp_pm_lock_acquire(s_pm_sleep_lock); -#endif - - return ESP_OK; -} -#endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT - -/* Wait for AES hardware block operation to complete */ -static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_desc_head, crypto_dma_desc_t *output_desc_tail) -{ -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) - if (use_intr) { - if (!xSemaphoreTake(op_complete_sem, AES_WAIT_INTR_TIMEOUT_MS / portTICK_PERIOD_MS)) { - /* indicates a fundamental problem with driver */ - ESP_LOGE(TAG, "Timed out waiting for completion of AES Interrupt"); - return -1; - } -#ifdef CONFIG_PM_ENABLE - esp_pm_lock_release(s_pm_cpu_lock); - esp_pm_lock_release(s_pm_sleep_lock); -#endif // CONFIG_PM_ENABLE - } -#endif - /* Checking this if interrupt is used also, to avoid - issues with AES fault injection - */ - aes_hal_wait_done(); - -#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE - const crypto_dma_desc_t *it = output_desc_head; - while(it != NULL) { - esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - it = (const crypto_dma_desc_t*) it->next; - }; -#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ - - esp_aes_wait_dma_done(output_desc_tail); - return 0; -} - - -static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out); - - -/* Output buffers in external ram needs to be 16-byte aligned and DMA cant access input in the iCache mem range, - reallocate them into internal memory and encrypt in chunks to avoid - having to malloc too big of a buffer - - The function esp_aes_process_dma_ext_ram zeroises the output buffer in the case of memory allocation failure. -*/ - -static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out, bool realloc_input, bool realloc_output) -{ - size_t chunk_len; - int ret = 0; - int offset = 0; - unsigned char *input_buf = NULL; - unsigned char *output_buf = NULL; - const unsigned char *dma_input; - chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); - - if (realloc_input) { - input_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); - - if (input_buf == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - } - - if (realloc_output) { - output_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); - - if (output_buf == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - } else { - output_buf = output; - } - - while (len) { - chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); - - /* If input needs realloc then copy it, else use the input with offset*/ - if (realloc_input) { - memcpy(input_buf, input + offset, chunk_len); - dma_input = input_buf; - } else { - dma_input = input + offset; - } - - if (esp_aes_process_dma(ctx, dma_input, output_buf, chunk_len, stream_out) != 0) { - ret = -1; - goto cleanup; - } - - if (realloc_output) { - memcpy(output + offset, output_buf, chunk_len); - } else { - output_buf = output + offset + chunk_len; - } - - len -= chunk_len; - offset += chunk_len; - } - -cleanup: - - if (realloc_input) { - free(input_buf); - } - if (realloc_output) { - free(output_buf); - } - - return ret; -} - -/* Encrypt/decrypt the input using DMA - * The function esp_aes_process_dma zeroises the output buffer in the case of following conditions: - * 1. If key is not written in the hardware - * 2. Memory allocation failures - * 3. If AES interrupt is enabled and ISR initialisation fails - * 4. Failure in any of the AES operations - */ -static int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) -{ - crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL; - crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ - crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; - size_t crypto_dma_desc_num = 0; - unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block - unsigned block_bytes = len - stream_bytes; // bytes which are in a full block - unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); - bool use_intr = false; - bool input_needs_realloc = false; - bool output_needs_realloc = false; - int ret = 0; - - assert(len > 0); // caller shouldn't ever have len set to zero - assert(stream_bytes == 0 || stream_out != NULL); // stream_out can be NULL if we're processing full block(s) - - /* If no key is written to hardware yet, either the user hasn't called - mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't - know which mode to use - or a fault skipped the - key write to hardware. Treat this as a fatal error and zero the output block. - */ - if (ctx->key_in_hardware != ctx->key_bytes) { - mbedtls_platform_zeroize(output, len); - return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; - } - - if (block_bytes > 0) { - /* Flush cache if input in external ram */ -#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) - if (esp_ptr_external_ram(input)) { - esp_cache_msync((void *)input, len, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - } - if (esp_ptr_external_ram(output)) { - uint32_t dcache_line_size; - esp_err_t ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_PSRAM, &dcache_line_size); - if (ret != ESP_OK) { - return ret; - } - if ((((intptr_t)(output) & (dcache_line_size - 1)) != 0) || (block_bytes % dcache_line_size != 0)) { - // Non aligned ext-mem buffer - output_needs_realloc = true; - } - } -#endif - /* DMA cannot access memory in the iCache range, copy input to internal ram */ - if (!s_check_dma_capable(input)) { - input_needs_realloc = true; - } - - if (!s_check_dma_capable(output)) { - output_needs_realloc = true; - } - - /* If either input or output is unaccessible to the DMA then they need to be reallocated */ - if (input_needs_realloc || output_needs_realloc) { - return esp_aes_process_dma_ext_ram(ctx, input, output, len, stream_out, input_needs_realloc, output_needs_realloc); - } - - /* Set up dma descriptors for input and output considering the 16 byte alignment requirement for EDMA */ - crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED); - - /* Allocate both in and out descriptors to save a malloc/free per function call */ - block_desc = heap_caps_aligned_calloc(8, crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); - if (block_desc == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - - block_in_desc = block_desc; - block_out_desc = block_desc + crypto_dma_desc_num; - - dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - //Limit max inlink descriptor length to be 16 byte aligned, require for EDMA - dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED, 0); - - /* Setup in/out start descriptors */ - dma_desc_append(&in_desc_head, block_in_desc); - dma_desc_append(&out_desc_head, block_out_desc); - - out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; - } - - /* Any leftover bytes which are appended as an additional DMA list */ - if (stream_bytes > 0) { - - memset(&s_stream_in_desc, 0, sizeof(crypto_dma_desc_t)); - memset(&s_stream_out_desc, 0, sizeof(crypto_dma_desc_t)); - - memset(s_stream_in, 0, AES_BLOCK_BYTES); - memset(s_stream_out, 0, AES_BLOCK_BYTES); - - memcpy(s_stream_in, input + block_bytes, stream_bytes); - - dma_desc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - dma_desc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - /* Link with block descriptors */ - dma_desc_append(&in_desc_head, &s_stream_in_desc); - dma_desc_append(&out_desc_head, &s_stream_out_desc); - - out_desc_tail = &s_stream_out_desc; - } - -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) - /* Only use interrupt for long AES operations */ - if (len > AES_DMA_INTR_TRIG_LEN) { - use_intr = true; - if (esp_aes_isr_initialise() != ESP_OK) { - ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); - ret = -1; - goto cleanup; - } - } else -#endif - { - aes_hal_interrupt_enable(false); - } - - if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { - ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); - ret = -1; - goto cleanup; - } - - aes_hal_transform_dma_start(blocks); - - if (esp_aes_dma_wait_complete(use_intr, out_desc_head, out_desc_tail) < 0) { - ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); - ret = -1; - goto cleanup; - } - -#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) - if (block_bytes > 0) { - if (esp_ptr_external_ram(output)) { - esp_cache_msync((void*)output, block_bytes, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - } - } -#endif - aes_hal_transform_dma_finish(); - - if (stream_bytes > 0) { - memcpy(output + block_bytes, s_stream_out, stream_bytes); - memcpy(stream_out, s_stream_out, AES_BLOCK_BYTES); - } - -cleanup: - if (ret != 0) { - mbedtls_platform_zeroize(output, len); - } - free(block_desc); - return ret; -} - - -#if CONFIG_MBEDTLS_HARDWARE_GCM - -/* Encrypt/decrypt with AES-GCM the input using DMA - * The function esp_aes_process_dma_gcm zeroises the output buffer in the case of following conditions: - * 1. If key is not written in the hardware - * 2. Memory allocation failures - * 3. If AES interrupt is enabled and ISR initialisation fails - * 4. Failure in any of the AES operations - */ -int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, crypto_dma_desc_t *aad_desc, size_t aad_len) -{ - crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL, *len_desc = NULL; - crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ - crypto_dma_desc_t stream_in_desc, stream_out_desc; - crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; - size_t crypto_dma_desc_num = 0; - uint32_t len_buf[4] = {}; - uint8_t stream_in[16] = {}; - uint8_t stream_out[16] = {}; - unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block - unsigned block_bytes = len - stream_bytes; // bytes which are in a full block - - unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); - - bool use_intr = false; - int ret = 0; - - /* If no key is written to hardware yet, either the user hasn't called - mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't - know which mode to use - or a fault skipped the - key write to hardware. Treat this as a fatal error and zero the output block. - */ - if (ctx->key_in_hardware != ctx->key_bytes) { - mbedtls_platform_zeroize(output, len); - return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; - } - - /* Set up dma descriptors for input and output */ - crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC); - - /* Allocate both in and out descriptors to save a malloc/free per function call, add 1 for length descriptor */ - block_desc = heap_caps_calloc((crypto_dma_desc_num * 2) + 1, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); - if (block_desc == NULL) { - mbedtls_platform_zeroize(output, len); - ESP_LOGE(TAG, "Failed to allocate memory"); - return -1; - } - - block_in_desc = block_desc; - len_desc = block_desc + crypto_dma_desc_num; - block_out_desc = block_desc + crypto_dma_desc_num + 1; - - if (aad_desc != NULL) { - dma_desc_append(&in_desc_head, aad_desc); - } - - if (block_bytes > 0) { - dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - dma_desc_append(&in_desc_head, block_in_desc); - dma_desc_append(&out_desc_head, block_out_desc); - - out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; - } - - /* Any leftover bytes which are appended as an additional DMA list */ - if (stream_bytes > 0) { - memcpy(stream_in, input + block_bytes, stream_bytes); - - dma_desc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - dma_desc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC, 0); - - dma_desc_append(&in_desc_head, &stream_in_desc); - dma_desc_append(&out_desc_head, &stream_out_desc); - - out_desc_tail = &stream_out_desc; - } - - - len_buf[1] = __builtin_bswap32(aad_len * 8); - len_buf[3] = __builtin_bswap32(len * 8); - - len_desc->dw0.length = sizeof(len_buf); - len_desc->dw0.size = sizeof(len_buf); - len_desc->dw0.owner = 1; - len_desc->dw0.suc_eof = 1; - len_desc->buffer = (uint8_t *)len_buf; - - dma_desc_append(&in_desc_head, len_desc); - -#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) - /* Only use interrupt for long AES operations */ - if (len > AES_DMA_INTR_TRIG_LEN) { - use_intr = true; - if (esp_aes_isr_initialise() != ESP_OK) { - ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); - ret = -1; - goto cleanup; - } - } else -#endif - { - aes_hal_interrupt_enable(false); - } - - /* Start AES operation */ - if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { - ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); - ret = -1; - goto cleanup; - } - - aes_hal_transform_dma_gcm_start(blocks); - - if (esp_aes_dma_wait_complete(use_intr, out_desc_head, out_desc_tail) < 0) { - ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); - ret = -1; - goto cleanup; - } - - aes_hal_transform_dma_finish(); - - if (stream_bytes > 0) { - memcpy(output + block_bytes, stream_out, stream_bytes); - } - -cleanup: - if (ret != 0) { - mbedtls_platform_zeroize(output, len); - } - free(block_desc); - return ret; -} - -#endif //CONFIG_MBEDTLS_HARDWARE_GCM - static int esp_aes_validate_input(esp_aes_context *ctx, const unsigned char *input, unsigned char *output ) { @@ -1147,14 +540,3 @@ int esp_aes_crypt_ctr(esp_aes_context *ctx, return 0; } - -static bool s_check_dma_capable(const void *p) -{ - bool is_capable = false; -#if CONFIG_SPIRAM - is_capable |= esp_ptr_dma_ext_capable(p); -#endif - is_capable |= esp_ptr_dma_capable(p); - - return is_capable; -} diff --git a/components/mbedtls/port/aes/dma/esp_aes_dma_core.c b/components/mbedtls/port/aes/dma/esp_aes_dma_core.c new file mode 100644 index 0000000000..0aaa0737e1 --- /dev/null +++ b/components/mbedtls/port/aes/dma/esp_aes_dma_core.c @@ -0,0 +1,992 @@ +/* + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD + * + * SPDX-License-Identifier: Apache-2.0 + */ +#include +#include +#include "esp_attr.h" +#include "esp_cache.h" +#include "esp_check.h" +#include "esp_dma_utils.h" +#include "esp_err.h" +#include "esp_heap_caps.h" +#include "esp_intr_alloc.h" +#include "esp_log.h" +#include "esp_memory_utils.h" +#include "esp_private/esp_cache_private.h" +#include "esp_private/periph_ctrl.h" + +#if CONFIG_PM_ENABLE +#include "esp_pm.h" +#endif +#include "hal/aes_hal.h" + +#include "esp_aes_dma_priv.h" +#include "esp_aes_internal.h" +#include "esp_crypto_dma.h" + +#include "freertos/FreeRTOS.h" +#include "freertos/semphr.h" + +#include "mbedtls/aes.h" +#include "mbedtls/platform_util.h" + +#if SOC_AES_SUPPORT_GCM +#include "aes/esp_aes_gcm.h" +#endif + +/* Max size of each chunk to process when output buffer is in unaligned external ram + must be a multiple of block size +*/ +#define AES_MAX_CHUNK_WRITE_SIZE 1600 + +/* Input over this length will yield and wait for interrupt instead of + busy-waiting, 30000 bytes is approx 0.5 ms */ +#define AES_DMA_INTR_TRIG_LEN 2000 + +/* With buffers in PSRAM (worst condition) we still achieve a speed of 4 MB/s + thus a 2 second timeout value should be suffient for even very large buffers. + */ +#define AES_WAIT_INTR_TIMEOUT_MS 2000 + +#if defined(CONFIG_MBEDTLS_AES_USE_INTERRUPT) +static SemaphoreHandle_t op_complete_sem; +#if defined(CONFIG_PM_ENABLE) +static esp_pm_lock_handle_t s_pm_cpu_lock; +static esp_pm_lock_handle_t s_pm_sleep_lock; +#endif +#endif + +static const char *TAG = "esp-aes"; + +static bool s_check_dma_capable(const void *p) +{ + bool is_capable = false; +#if CONFIG_SPIRAM + is_capable |= esp_ptr_dma_ext_capable(p); +#endif + is_capable |= esp_ptr_dma_capable(p); + + return is_capable; +} + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) +static IRAM_ATTR void esp_aes_complete_isr(void *arg) +{ + BaseType_t higher_woken; + aes_hal_interrupt_clear(); + xSemaphoreGiveFromISR(op_complete_sem, &higher_woken); + if (higher_woken) { + portYIELD_FROM_ISR(); + } +} + +void esp_aes_intr_alloc(void) +{ + if (op_complete_sem == NULL) { + const int isr_flags = esp_intr_level_to_flags(CONFIG_MBEDTLS_AES_INTERRUPT_LEVEL); + + esp_err_t ret = esp_intr_alloc(ETS_AES_INTR_SOURCE, isr_flags, esp_aes_complete_isr, NULL, NULL); + if (ret != ESP_OK) { + ESP_LOGE(TAG, "Failed to allocate AES interrupt %d", ret); + // This should be treated as fatal error as this API would mostly + // be invoked within mbedTLS interface. There is no way for the system + // to proceed if the AES interrupt allocation fails here. + abort(); + } + + static StaticSemaphore_t op_sem_buf; + op_complete_sem = xSemaphoreCreateBinaryStatic(&op_sem_buf); + // Static semaphore creation is unlikley to fail but still basic sanity + assert(op_complete_sem != NULL); + } +} + +static esp_err_t esp_aes_isr_initialise( void ) +{ + aes_hal_interrupt_clear(); + aes_hal_interrupt_enable(true); + + /* AES is clocked proportionally to CPU clock, take power management lock */ +#ifdef CONFIG_PM_ENABLE + if (s_pm_cpu_lock == NULL) { + if (esp_pm_lock_create(ESP_PM_NO_LIGHT_SLEEP, 0, "aes_sleep", &s_pm_sleep_lock) != ESP_OK) { + ESP_LOGE(TAG, "Failed to create PM sleep lock"); + return ESP_FAIL; + } + if (esp_pm_lock_create(ESP_PM_CPU_FREQ_MAX, 0, "aes_cpu", &s_pm_cpu_lock) != ESP_OK) { + ESP_LOGE(TAG, "Failed to create PM CPU lock"); + return ESP_FAIL; + } + } + esp_pm_lock_acquire(s_pm_cpu_lock); + esp_pm_lock_acquire(s_pm_sleep_lock); +#endif + + return ESP_OK; +} +#endif // CONFIG_MBEDTLS_AES_USE_INTERRUPT + +static inline void esp_aes_wait_dma_done(crypto_dma_desc_t *output) +{ + /* Wait for DMA write operation to complete */ + while (1) { + if ( esp_aes_dma_done(output) ) { + break; + } + } +} + +/* Wait for AES hardware block operation to complete */ +static int esp_aes_dma_wait_complete(bool use_intr, crypto_dma_desc_t *output_desc_tail) +{ +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + if (use_intr) { + if (!xSemaphoreTake(op_complete_sem, AES_WAIT_INTR_TIMEOUT_MS / portTICK_PERIOD_MS)) { + /* indicates a fundamental problem with driver */ + ESP_LOGE(TAG, "Timed out waiting for completion of AES Interrupt"); + return -1; + } +#ifdef CONFIG_PM_ENABLE + esp_pm_lock_release(s_pm_cpu_lock); + esp_pm_lock_release(s_pm_sleep_lock); +#endif // CONFIG_PM_ENABLE + } +#endif + /* Checking this if interrupt is used also, to avoid + issues with AES fault injection + */ + aes_hal_wait_done(); + + esp_aes_wait_dma_done(output_desc_tail); + return 0; +} + + +/* Output buffers in external ram needs to be 16-byte aligned and DMA cant access input in the iCache mem range, + reallocate them into internal memory and encrypt in chunks to avoid + having to malloc too big of a buffer + + The function esp_aes_process_dma_ext_ram zeroises the output buffer in the case of memory allocation failure. +*/ + +static int esp_aes_process_dma_ext_ram(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out, bool realloc_input, bool realloc_output) +{ + size_t chunk_len; + int ret = 0; + int offset = 0; + unsigned char *input_buf = NULL; + unsigned char *output_buf = NULL; + const unsigned char *dma_input; + chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); + + if (realloc_input) { + input_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); + + if (input_buf == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + } + + if (realloc_output) { + output_buf = heap_caps_malloc(chunk_len, MALLOC_CAP_DMA); + + if (output_buf == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + } else { + output_buf = output; + } + + while (len) { + chunk_len = MIN(AES_MAX_CHUNK_WRITE_SIZE, len); + + /* If input needs realloc then copy it, else use the input with offset*/ + if (realloc_input) { + memcpy(input_buf, input + offset, chunk_len); + dma_input = input_buf; + } else { + dma_input = input + offset; + } + + if (esp_aes_process_dma(ctx, dma_input, output_buf, chunk_len, stream_out) != 0) { + ret = -1; + goto cleanup; + } + + if (realloc_output) { + memcpy(output + offset, output_buf, chunk_len); + } else { + output_buf = output + offset + chunk_len; + } + + len -= chunk_len; + offset += chunk_len; + } + +cleanup: + + if (realloc_input) { + free(input_buf); + } + if (realloc_output) { + free(output_buf); + } + + return ret; +} + +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + +#define ALIGN_UP(num, align) (((num) + ((align) - 1)) & ~((align) - 1)) +#define ALIGN_DOWN(num, align) ((num) & ~((align) - 1)) +#define AES_DMA_ALLOC_CAPS (MALLOC_CAP_DMA | MALLOC_CAP_8BIT) + +static inline void *aes_dma_calloc(size_t num, size_t size, uint32_t caps, size_t *actual_size) +{ + void *ptr = NULL; + esp_dma_calloc(num, size, caps, &ptr, actual_size); + return ptr; +} + +static inline size_t get_cache_line_size(const void *addr) +{ + esp_err_t ret = ESP_FAIL; + size_t cache_line_size = 0; + +#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) + if (esp_ptr_external_ram(addr)) { + ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_PSRAM, &cache_line_size); + } else +#endif + { + ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_DMA, &cache_line_size); + } + + if (ret != ESP_OK) { + return 0; + } + + return cache_line_size; +} + +static inline esp_err_t dma_desc_link(crypto_dma_desc_t *dmadesc, size_t crypto_dma_desc_num, size_t cache_line_size) +{ + esp_err_t ret = ESP_OK; + for (int i = 0; i < crypto_dma_desc_num; i++) { + dmadesc[i].dw0.suc_eof = ((i == crypto_dma_desc_num - 1) ? 1 : 0); + dmadesc[i].next = ((i == crypto_dma_desc_num - 1) ? NULL : &dmadesc[i+1]); +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + /* Write back both input buffers and output buffers to clear any cache dirty bit if set */ + ret = esp_cache_msync(dmadesc[i].buffer, dmadesc[i].dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); + if (ret != ESP_OK) { + return ret; + } + } + ret = esp_cache_msync(dmadesc, ALIGN_UP(crypto_dma_desc_num * sizeof(crypto_dma_desc_t), cache_line_size), ESP_CACHE_MSYNC_FLAG_DIR_C2M); +#else + } +#endif + return ret; +} + +static inline void dma_desc_populate(crypto_dma_desc_t *dmadesc, const uint8_t *data, size_t len, int max_desc_size, size_t index) +{ + int dmachunklen = 0; + + while (len) { + dmachunklen = len; + if (dmachunklen > max_desc_size) { + dmachunklen = max_desc_size; + } + dmadesc[index].dw0.size = dmachunklen; + dmadesc[index].dw0.length = dmachunklen; + dmadesc[index].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + dmadesc[index].buffer = (void *)data; + len -= dmachunklen; + data += dmachunklen; + index++; + } +} + +/** + * @brief Function that allocates and populates a given number of DMA descriptors to form a DMA descriptor linked list + * + * @param buffer Data (can be unaligned) buffer that is to be operated upon in an AES operation (ciphertext or plaintext) + * @param len Length of the above data buffer + * @param start_alignment_buffer The buffer which the first DMA descriptor points to for processing start_alignment length of bytes from the above buffer + * @param end_alignment_buffer The buffer which the last DMA descriptor points to for processing end_alignment length of bytes from the above buffer + * @param alignment_buffer_size Size of an alignment buffer + * @param cache_line_size Size of cache line that is needed to align the buffers and DMA descriptors before cache sync + * @param[out] start_alignment The least number of bytes from the start of the buffer that are unaligned to the Cache requirements (L1 Cache alignments) + * @param[out] end_alignment The number of bytes at the end of the buffer aligned up to AES_BLOCK_BYTES that are unaligned to the Cache requirements (L1 Cache alignments) + * @param[out] dma_descs Pointer to the list of DMA descriptors that are needed to be populated + * @param[out] dma_desc_num Number of DMA descriptors that are needed to be allocated + */ +static esp_err_t generate_descriptor_list(const uint8_t *buffer, const size_t len, uint8_t **start_alignment_buffer, uint8_t **end_alignment_buffer, size_t alignment_buffer_size, size_t cache_line_size, size_t *start_alignment, size_t *end_alignment, crypto_dma_desc_t **dma_descs, size_t *dma_desc_num, bool is_output) +{ + size_t unaligned_start_bytes = 0; + size_t aligned_block_bytes = 0; + size_t unaligned_end_bytes = 0; + size_t dma_descs_needed = 0; + + uint8_t *start_alignment_stream_buffer = NULL; + uint8_t *end_alignment_stream_buffer = NULL; + + crypto_dma_desc_t *dma_descriptors = NULL; + + if (len == 0) { + goto ret; + } + + /* Extra bytes that were needed to be processed for supplying the AES peripheral a padded multiple of 16 bytes input */ + size_t extra_bytes = ALIGN_UP(len, AES_BLOCK_BYTES) - len; + + size_t start_offset = ((intptr_t)buffer & (cache_line_size - 1)); + + if (start_offset) { + unaligned_start_bytes = cache_line_size - start_offset; + } else { + unaligned_start_bytes = 0; + } + + if (unaligned_start_bytes < len) { + aligned_block_bytes = ALIGN_DOWN((len - unaligned_start_bytes), cache_line_size); + unaligned_end_bytes = len - unaligned_start_bytes - aligned_block_bytes + extra_bytes; + } else { + unaligned_start_bytes = len + extra_bytes; + unaligned_end_bytes = 0; + aligned_block_bytes = 0; + } + + size_t max_desc_size = (is_output) ? DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED : DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED; + + dma_descs_needed = (unaligned_start_bytes ? 1 : 0) + dma_desc_get_required_num(aligned_block_bytes, max_desc_size) + (unaligned_end_bytes ? 1 : 0); + + /* Allocate memory for DMA descriptors of total size aligned up to a multiple of cache line size */ + dma_descriptors = (crypto_dma_desc_t *) aes_dma_calloc(dma_descs_needed, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA, NULL); + if (dma_descriptors == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for the array of DMA descriptors"); + return ESP_FAIL; + } + + size_t populated_dma_descs = 0; + + if (unaligned_start_bytes) { + start_alignment_stream_buffer = aes_dma_calloc(alignment_buffer_size, sizeof(uint8_t), AES_DMA_ALLOC_CAPS, NULL); + if (start_alignment_stream_buffer == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for start alignment buffer"); + return ESP_FAIL; + } + + memset(start_alignment_stream_buffer, 0, unaligned_start_bytes); + memcpy(start_alignment_stream_buffer, buffer, (unaligned_start_bytes > len) ? len : unaligned_start_bytes); + memset(start_alignment_stream_buffer + unaligned_start_bytes, 0, alignment_buffer_size - unaligned_start_bytes); + + // add start alignment node to the DMA linked list + dma_desc_populate(dma_descriptors, start_alignment_stream_buffer, unaligned_start_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, populated_dma_descs); + populated_dma_descs += (unaligned_start_bytes ? 1 : 0); + } + + if (aligned_block_bytes) { + // add "aligned_dma_desc_num" nodes to DMA linked list + dma_desc_populate(dma_descriptors, buffer + unaligned_start_bytes, aligned_block_bytes, max_desc_size, populated_dma_descs); + populated_dma_descs += dma_desc_get_required_num(aligned_block_bytes, max_desc_size); + } + + if (unaligned_end_bytes) { + end_alignment_stream_buffer = aes_dma_calloc(alignment_buffer_size, sizeof(uint8_t), AES_DMA_ALLOC_CAPS, NULL); + if (end_alignment_stream_buffer == NULL) { + ESP_LOGE(TAG, "Failed to allocate memory for end alignment buffer"); + return ESP_FAIL; + } + + memset(end_alignment_stream_buffer, 0, unaligned_end_bytes); + memcpy(end_alignment_stream_buffer, buffer + unaligned_start_bytes + aligned_block_bytes, unaligned_end_bytes - extra_bytes); + memset(end_alignment_stream_buffer + unaligned_end_bytes, 0, alignment_buffer_size - unaligned_end_bytes); + + // add end alignment node to the DMA linked list + dma_desc_populate(dma_descriptors, end_alignment_stream_buffer, unaligned_end_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, populated_dma_descs); + populated_dma_descs += (unaligned_end_bytes ? 1 : 0); + } + + if (dma_desc_link(dma_descriptors, dma_descs_needed, cache_line_size) != ESP_OK) { + ESP_LOGE(TAG, "DMA descriptors cache sync C2M failed"); + return ESP_FAIL; + } + +ret: + if (start_alignment != NULL) { + *start_alignment = unaligned_start_bytes; + } + + if (end_alignment != NULL) { + *end_alignment = unaligned_end_bytes; + } + + if (dma_desc_num != NULL) { + *dma_desc_num = dma_descs_needed; + } + + *dma_descs = dma_descriptors; + *start_alignment_buffer = start_alignment_stream_buffer; + *end_alignment_buffer = end_alignment_stream_buffer; + + return ESP_OK; +} + +int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) +{ + unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block + unsigned block_bytes = len - stream_bytes; // bytes which are in a full block + unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); + + bool use_intr = false; + bool input_needs_realloc = false; + bool output_needs_realloc = false; + int ret = 0; + + assert(len > 0); // caller shouldn't ever have len set to zero + assert(stream_bytes == 0 || stream_out != NULL); // stream_out can be NULL if we're processing full block(s) + + /* If no key is written to hardware yet, either the user hasn't called + mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't + know which mode to use - or a fault skipped the + key write to hardware. Treat this as a fatal error and zero the output block. + */ + if (ctx->key_in_hardware != ctx->key_bytes) { + mbedtls_platform_zeroize(output, len); + return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; + } + + /* DMA cannot access memory in the iCache range, copy input to internal ram */ + if (!s_check_dma_capable(input)) { + input_needs_realloc = true; + } + + if (!s_check_dma_capable(output)) { + output_needs_realloc = true; + } + + /* If either input or output is unaccessible to the DMA then they need to be reallocated */ + if (input_needs_realloc || output_needs_realloc) { + return esp_aes_process_dma_ext_ram(ctx, input, output, len, stream_out, input_needs_realloc, output_needs_realloc); + } + + size_t input_cache_line_size = get_cache_line_size(input); + size_t output_cache_line_size = get_cache_line_size(output); + + if (input_cache_line_size == 0 || output_cache_line_size == 0) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Getting cache line size failed"); + return -1; + } + + size_t input_alignment_buffer_size = MAX(2 * input_cache_line_size, AES_BLOCK_BYTES); + + crypto_dma_desc_t *input_desc = NULL; + uint8_t *input_start_stream_buffer = NULL; + uint8_t *input_end_stream_buffer = NULL; + + if (generate_descriptor_list(input, len, &input_start_stream_buffer, &input_end_stream_buffer, input_alignment_buffer_size, input_cache_line_size, NULL, NULL, &input_desc, NULL, false) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Generating input DMA descriptors failed"); + return -1; + } + + size_t output_alignment_buffer_size = MAX(2 * output_cache_line_size, AES_BLOCK_BYTES); + + crypto_dma_desc_t *output_desc = NULL; + uint8_t *output_start_stream_buffer = NULL; + uint8_t *output_end_stream_buffer = NULL; + size_t output_start_alignment = 0; + size_t output_end_alignment = 0; + size_t output_dma_desc_num = 0; + + if (generate_descriptor_list(output, len, &output_start_stream_buffer, &output_end_stream_buffer, output_alignment_buffer_size, output_cache_line_size, &output_start_alignment, &output_end_alignment, &output_desc, &output_dma_desc_num, true) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Generating output DMA descriptors failed"); + return -1; + } + + crypto_dma_desc_t *out_desc_tail = &output_desc[output_dma_desc_num - 1]; + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + /* Only use interrupt for long AES operations */ + if (len > AES_DMA_INTR_TRIG_LEN) { + use_intr = true; + if (esp_aes_isr_initialise() != ESP_OK) { + ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); + ret = -1; + goto cleanup; + } + } else +#endif + { + aes_hal_interrupt_enable(false); + } + + if (esp_aes_dma_start(input_desc, output_desc) != ESP_OK) { + ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_start(blocks); + + if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); + ret = -1; + goto cleanup; + } + +#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + if (esp_cache_msync(output_desc, ALIGN_UP(output_dma_desc_num * sizeof(crypto_dma_desc_t), output_cache_line_size), ESP_CACHE_MSYNC_FLAG_DIR_M2C) != ESP_OK) { + ESP_LOGE(TAG, "Output DMA descriptor cache sync M2C failed"); + ret = -1; + goto cleanup; + } + for (int i = 0; i < output_dma_desc_num; i++) { + if (esp_cache_msync(output_desc[i].buffer, output_desc[i].dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED) != ESP_OK) { + ESP_LOGE(TAG, "Output DMA descriptor buffers cache sync M2C failed"); + ret = -1; + goto cleanup; + } + } +#endif + + aes_hal_transform_dma_finish(); + + /* Extra bytes that were needed to be processed for supplying the AES peripheral a padded multiple of 16 bytes input */ + size_t extra_bytes = ALIGN_UP(len, AES_BLOCK_BYTES) - len; + + if (output_start_alignment) { + memcpy(output, output_start_stream_buffer, (output_start_alignment > len) ? len : output_start_alignment); + } + + if (output_end_alignment) { + memcpy(output + len - (output_end_alignment - extra_bytes), output_end_stream_buffer, output_end_alignment - extra_bytes); + } + + if (stream_bytes > 0) { + if (output_end_alignment) { + if (output_end_alignment >= AES_BLOCK_BYTES) { + memcpy(stream_out, output_end_stream_buffer + output_end_alignment - AES_BLOCK_BYTES, AES_BLOCK_BYTES); + } else { + size_t to_copy_from_output = AES_BLOCK_BYTES - output_end_alignment; + memcpy(stream_out, output + len - to_copy_from_output, to_copy_from_output); + memcpy(stream_out + to_copy_from_output, output_end_stream_buffer, output_end_alignment); + } + } + else if (output_start_alignment >= len) { + memcpy(stream_out, output_start_stream_buffer + output_start_alignment - AES_BLOCK_BYTES, AES_BLOCK_BYTES); + } + } + +cleanup: + if (ret != 0) { + mbedtls_platform_zeroize(output, len); + } + + free(input_start_stream_buffer); + free(input_end_stream_buffer); + + free(output_start_stream_buffer); + free(output_end_stream_buffer); + + free(input_desc); + free(output_desc); + + return ret; +} + +#else /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ + +/* These are static due to: + * * Must be in DMA capable memory, so stack is not a safe place to put them + * * To avoid having to malloc/free them for every DMA operation + */ +static DRAM_ATTR crypto_dma_desc_t s_stream_in_desc; +static DRAM_ATTR crypto_dma_desc_t s_stream_out_desc; +static DRAM_ATTR uint8_t s_stream_in[AES_BLOCK_BYTES]; +static DRAM_ATTR uint8_t s_stream_out[AES_BLOCK_BYTES]; + +/** Append a descriptor to the chain, set head if chain empty + * + * @param[out] head Pointer to the first/head node of the DMA descriptor linked list + * @param item Pointer to the DMA descriptor node that has to be appended + */ +static inline void dma_desc_append(crypto_dma_desc_t **head, crypto_dma_desc_t *item) +{ + crypto_dma_desc_t *it; + if (*head == NULL) { + *head = item; + return; + } + + it = *head; + + while (it->next != 0) { + it = (crypto_dma_desc_t *)it->next; + } + it->dw0.suc_eof = 0; + it->next = item; +} + +/** + * Generate a linked list pointing to a (huge) buffer in an descriptor array. + * + * The caller should ensure there is enough size to hold the array, by calling + * `dma_desc_get_required_num` with the same or less than the max_desc_size argument. + * + * @param[out] dmadesc Output of a descriptor array, the head should be fed to the DMA. + * @param data Buffer for the descriptors to point to. + * @param len Size (or length for TX) of the buffer + * @param max_desc_size Maximum length of each descriptor + * @param isrx The RX DMA may require the buffer to be word-aligned, set to true for a RX link, otherwise false. + */ +static inline void dma_desc_setup_link(crypto_dma_desc_t* dmadesc, const uint8_t *data, int len, int max_desc_size, bool isrx) +{ + int i = 0; + while (len) { + int dmachunklen = len; + if (dmachunklen > max_desc_size) { + dmachunklen = max_desc_size; + } + if (isrx) { + //Receive needs DMA length rounded to next 32-bit boundary + dmadesc[i].dw0.size = (dmachunklen + 3) & (~3); + dmadesc[i].dw0.length = (dmachunklen + 3) & (~3); + } else { + dmadesc[i].dw0.size = dmachunklen; + dmadesc[i].dw0.length = dmachunklen; + } + dmadesc[i].buffer = (void *)data; + dmadesc[i].dw0.suc_eof = 0; + dmadesc[i].dw0.owner = DMA_DESCRIPTOR_BUFFER_OWNER_DMA; + dmadesc[i].next = &dmadesc[i + 1]; + len -= dmachunklen; + data += dmachunklen; + i++; + } + dmadesc[i - 1].dw0.suc_eof = 1; //Mark last DMA desc as end of stream. + dmadesc[i - 1].next = NULL; +} + +/* Encrypt/decrypt the input using DMA + * The function esp_aes_process_dma zeroises the output buffer in the case of following conditions: + * 1. If key is not written in the hardware + * 2. Memory allocation failures + * 3. If AES interrupt is enabled and ISR initialisation fails + * 4. Failure in any of the AES operations + */ +int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out) +{ + crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL; + crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ + crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; + size_t crypto_dma_desc_num = 0; + unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block + unsigned block_bytes = len - stream_bytes; // bytes which are in a full block + unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); + bool use_intr = false; + bool input_needs_realloc = false; + bool output_needs_realloc = false; + int ret = 0; + + assert(len > 0); // caller shouldn't ever have len set to zero + assert(stream_bytes == 0 || stream_out != NULL); // stream_out can be NULL if we're processing full block(s) + + /* If no key is written to hardware yet, either the user hasn't called + mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't + know which mode to use - or a fault skipped the + key write to hardware. Treat this as a fatal error and zero the output block. + */ + if (ctx->key_in_hardware != ctx->key_bytes) { + mbedtls_platform_zeroize(output, len); + return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; + } + + if (block_bytes > 0) { + /* Flush cache if input in external ram */ +#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) + if (esp_ptr_external_ram(input)) { + if (esp_cache_msync((void *)input, len, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Cache sync failed for the input in external RAM"); + return -1; + } + } + if (esp_ptr_external_ram(output)) { + size_t dcache_line_size; + ret = esp_cache_get_alignment(ESP_CACHE_MALLOC_FLAG_PSRAM, &dcache_line_size); + if (ret != ESP_OK) { + return ret; + } + if ((((intptr_t)(output) & (dcache_line_size - 1)) != 0) || (block_bytes % dcache_line_size != 0)) { + // Non aligned ext-mem buffer + output_needs_realloc = true; + } + } +#endif + /* DMA cannot access memory in the iCache range, copy input to internal ram */ + if (!s_check_dma_capable(input)) { + input_needs_realloc = true; + } + + if (!s_check_dma_capable(output)) { + output_needs_realloc = true; + } + + /* If either input or output is unaccessible to the DMA then they need to be reallocated */ + if (input_needs_realloc || output_needs_realloc) { + return esp_aes_process_dma_ext_ram(ctx, input, output, len, stream_out, input_needs_realloc, output_needs_realloc); + } + + /* Set up dma descriptors for input and output considering the 16 byte alignment requirement for EDMA */ + crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED); + + /* Allocate both in and out descriptors to save a malloc/free per function call */ + block_desc = heap_caps_aligned_calloc(8, crypto_dma_desc_num * 2, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); + if (block_desc == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + + block_in_desc = block_desc; + block_out_desc = block_desc + crypto_dma_desc_num; + + // the size field has 12 bits, but 0 not for 4096. + // to avoid possible problem when the size is not word-aligned, we only use 4096-4 per desc. + // Maximum size of data in the buffer that a DMA descriptor can hold. + dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + //Limit max inlink descriptor length to be 16 byte aligned, require for EDMA + dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_16B_ALIGNED, 0); + + /* Setup in/out start descriptors */ + dma_desc_append(&in_desc_head, block_in_desc); + dma_desc_append(&out_desc_head, block_out_desc); + + out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; + } + + /* Any leftover bytes which are appended as an additional DMA list */ + if (stream_bytes > 0) { + + memset(&s_stream_in_desc, 0, sizeof(crypto_dma_desc_t)); + memset(&s_stream_out_desc, 0, sizeof(crypto_dma_desc_t)); + + memset(s_stream_in, 0, AES_BLOCK_BYTES); + memset(s_stream_out, 0, AES_BLOCK_BYTES); + + memcpy(s_stream_in, input + block_bytes, stream_bytes); + + dma_desc_setup_link(&s_stream_in_desc, s_stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + dma_desc_setup_link(&s_stream_out_desc, s_stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + /* Link with block descriptors */ + dma_desc_append(&in_desc_head, &s_stream_in_desc); + dma_desc_append(&out_desc_head, &s_stream_out_desc); + + out_desc_tail = &s_stream_out_desc; + } + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + /* Only use interrupt for long AES operations */ + if (len > AES_DMA_INTR_TRIG_LEN) { + use_intr = true; + if (esp_aes_isr_initialise() != ESP_OK) { + ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); + ret = -1; + goto cleanup; + } + } else +#endif + { + aes_hal_interrupt_enable(false); + } + + if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { + ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_start(blocks); + + if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); + ret = -1; + goto cleanup; + } + +#if (CONFIG_SPIRAM && SOC_PSRAM_DMA_CAPABLE) + if (block_bytes > 0) { + if (esp_ptr_external_ram(output)) { + if(esp_cache_msync((void*)output, block_bytes, ESP_CACHE_MSYNC_FLAG_DIR_M2C | ESP_CACHE_MSYNC_FLAG_UNALIGNED) != ESP_OK) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Cache sync failed for the output in external RAM"); + return -1; + } + } + } +#endif + aes_hal_transform_dma_finish(); + + if (stream_bytes > 0) { + memcpy(output + block_bytes, s_stream_out, stream_bytes); + memcpy(stream_out, s_stream_out, AES_BLOCK_BYTES); + } + +cleanup: + if (ret != 0) { + mbedtls_platform_zeroize(output, len); + } + free(block_desc); + return ret; +} +#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ + +#if CONFIG_MBEDTLS_HARDWARE_GCM + +/* Encrypt/decrypt with AES-GCM the input using DMA + * The function esp_aes_process_dma_gcm zeroises the output buffer in the case of following conditions: + * 1. If key is not written in the hardware + * 2. Memory allocation failures + * 3. If AES interrupt is enabled and ISR initialisation fails + * 4. Failure in any of the AES operations + */ +int esp_aes_process_dma_gcm(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, crypto_dma_desc_t *aad_desc, size_t aad_len) +{ + crypto_dma_desc_t *in_desc_head = NULL, *out_desc_head = NULL, *len_desc = NULL; + crypto_dma_desc_t *out_desc_tail = NULL; /* pointer to the final output descriptor */ + crypto_dma_desc_t stream_in_desc, stream_out_desc; + crypto_dma_desc_t *block_desc = NULL, *block_in_desc = NULL, *block_out_desc = NULL; + size_t crypto_dma_desc_num = 0; + uint32_t len_buf[4] = {}; + uint8_t stream_in[16] = {}; + uint8_t stream_out[16] = {}; + unsigned stream_bytes = len % AES_BLOCK_BYTES; // bytes which aren't in a full block + unsigned block_bytes = len - stream_bytes; // bytes which are in a full block + + unsigned blocks = (block_bytes / AES_BLOCK_BYTES) + ((stream_bytes > 0) ? 1 : 0); + + bool use_intr = false; + int ret = 0; + + /* If no key is written to hardware yet, either the user hasn't called + mbedtls_aes_setkey_enc/mbedtls_aes_setkey_dec - meaning we also don't + know which mode to use - or a fault skipped the + key write to hardware. Treat this as a fatal error and zero the output block. + */ + if (ctx->key_in_hardware != ctx->key_bytes) { + mbedtls_platform_zeroize(output, len); + return MBEDTLS_ERR_AES_INVALID_INPUT_LENGTH; + } + + /* Set up dma descriptors for input and output */ + crypto_dma_desc_num = dma_desc_get_required_num(block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED); + + /* Allocate both in and out descriptors to save a malloc/free per function call, add 1 for length descriptor */ + block_desc = heap_caps_calloc((crypto_dma_desc_num * 2) + 1, sizeof(crypto_dma_desc_t), MALLOC_CAP_DMA); + if (block_desc == NULL) { + mbedtls_platform_zeroize(output, len); + ESP_LOGE(TAG, "Failed to allocate memory"); + return -1; + } + + block_in_desc = block_desc; + len_desc = block_desc + crypto_dma_desc_num; + block_out_desc = block_desc + crypto_dma_desc_num + 1; + + if (aad_desc != NULL) { + dma_desc_append(&in_desc_head, aad_desc); + } + + if (block_bytes > 0) { + dma_desc_setup_link(block_in_desc, input, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + dma_desc_setup_link(block_out_desc, output, block_bytes, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + dma_desc_append(&in_desc_head, block_in_desc); + dma_desc_append(&out_desc_head, block_out_desc); + + out_desc_tail = &block_out_desc[crypto_dma_desc_num - 1]; + } + + /* Any leftover bytes which are appended as an additional DMA list */ + if (stream_bytes > 0) { + memcpy(stream_in, input + block_bytes, stream_bytes); + + dma_desc_setup_link(&stream_in_desc, stream_in, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + dma_desc_setup_link(&stream_out_desc, stream_out, AES_BLOCK_BYTES, DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED, 0); + + dma_desc_append(&in_desc_head, &stream_in_desc); + dma_desc_append(&out_desc_head, &stream_out_desc); + + out_desc_tail = &stream_out_desc; + } + + + len_buf[1] = __builtin_bswap32(aad_len * 8); + len_buf[3] = __builtin_bswap32(len * 8); + + len_desc->dw0.length = sizeof(len_buf); + len_desc->dw0.size = sizeof(len_buf); + len_desc->dw0.owner = 1; + len_desc->dw0.suc_eof = 1; + len_desc->buffer = (uint8_t *)len_buf; + + dma_desc_append(&in_desc_head, len_desc); + +#if defined (CONFIG_MBEDTLS_AES_USE_INTERRUPT) + /* Only use interrupt for long AES operations */ + if (len > AES_DMA_INTR_TRIG_LEN) { + use_intr = true; + if (esp_aes_isr_initialise() != ESP_OK) { + ESP_LOGE(TAG, "ESP-AES ISR initialisation failed"); + ret = -1; + goto cleanup; + } + } else +#endif + { + aes_hal_interrupt_enable(false); + } + + /* Start AES operation */ + if (esp_aes_dma_start(in_desc_head, out_desc_head) != ESP_OK) { + ESP_LOGE(TAG, "esp_aes_dma_start failed, no DMA channel available"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_gcm_start(blocks); + + if (esp_aes_dma_wait_complete(use_intr, out_desc_tail) < 0) { + ESP_LOGE(TAG, "esp_aes_dma_wait_complete failed"); + ret = -1; + goto cleanup; + } + + aes_hal_transform_dma_finish(); + + if (stream_bytes > 0) { + memcpy(output + block_bytes, stream_out, stream_bytes); + } + +cleanup: + if (ret != 0) { + mbedtls_platform_zeroize(output, len); + } + free(block_desc); + return ret; +} + +#endif //CONFIG_MBEDTLS_HARDWARE_GCM diff --git a/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c b/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c index c43bfd68ae..c989927e2d 100644 --- a/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c +++ b/components/mbedtls/port/aes/dma/esp_aes_gdma_impl.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -15,5 +15,9 @@ esp_err_t esp_aes_dma_start(const crypto_dma_desc_t *input, const crypto_dma_des bool esp_aes_dma_done(const crypto_dma_desc_t *output) { +#if SOC_AXI_GDMA_SUPPORTED + return esp_crypto_shared_gdma_done(); +#else return (output->dw0.owner == 0); +#endif } diff --git a/components/mbedtls/port/aes/esp_aes_gcm.c b/components/mbedtls/port/aes/esp_aes_gcm.c index e7c34fc5b4..c33c1fdaa2 100644 --- a/components/mbedtls/port/aes/esp_aes_gcm.c +++ b/components/mbedtls/port/aes/esp_aes_gcm.c @@ -684,10 +684,13 @@ int esp_aes_gcm_crypt_and_tag( esp_gcm_context *ctx, return esp_aes_gcm_crypt_and_tag_partial_hw(ctx, mode, length, iv, iv_len, aad, aad_len, input, output, tag_len, tag); } - /* Limit aad len to a single DMA descriptor to simplify DMA handling - In practice, e.g. with mbedtls the length of aad will always be short + /* Limit aad len to a single DMA descriptor to simplify DMA handling + In practice, e.g. with mbedtls the length of aad will always be short + the size field has 12 bits, but 0 not for 4096. + to avoid possible problem when the size is not word-aligned, we only use 4096-4 per desc. + Maximum size of data in the buffer that a DMA descriptor can hold. */ - if (aad_len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE_PER_DESC) { + if (aad_len > DMA_DESCRIPTOR_BUFFER_MAX_SIZE_4B_ALIGNED) { return MBEDTLS_ERR_GCM_BAD_INPUT; } /* IV and AD are limited to 2^32 bits, so 2^29 bytes */ diff --git a/components/mbedtls/port/aes/include/esp_aes_internal.h b/components/mbedtls/port/aes/include/esp_aes_internal.h index e400e63f8a..1c065927df 100644 --- a/components/mbedtls/port/aes/include/esp_aes_internal.h +++ b/components/mbedtls/port/aes/include/esp_aes_internal.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -17,6 +17,20 @@ extern "C" { bool valid_key_length(const esp_aes_context *ctx); +#if SOC_AES_SUPPORT_DMA +/** + * @brief Run a AES operation using DMA + * + * @param ctx Aes context + * @param input Pointer to input data + * @param output Pointer to output data + * @param len Length of the input data + * @param stream_out The saved stream-block for resuming next operation. + * @return int -1 on error + */ +int esp_aes_process_dma(esp_aes_context *ctx, const unsigned char *input, unsigned char *output, size_t len, uint8_t *stream_out); +#endif + #if SOC_AES_SUPPORT_GCM /** * @brief Run a AES-GCM conversion using DMA diff --git a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c index 972c816800..059ebb738a 100644 --- a/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c +++ b/components/mbedtls/port/crypto_shared_gdma/esp_crypto_shared_gdma.c @@ -143,33 +143,14 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output, gdma_trigger_peripheral_t peripheral) { int rx_ch_id = 0; - esp_err_t ret = ESP_OK; - -#if SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE - // TODO: replace with `esp_cache_msync` - const crypto_dma_desc_t *it = input; - while(it != NULL) { - esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - it = (const crypto_dma_desc_t*) it->next; - } - - it = output; - while(it != NULL) { - esp_cache_msync(it->buffer, it->dw0.length, ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - esp_cache_msync((void *)it, sizeof(crypto_dma_desc_t), ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED); - it = (const crypto_dma_desc_t*) it->next; - }; -#endif /* SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE */ if (tx_channel == NULL) { /* Allocate a pair of RX and TX for crypto, should only happen the first time we use the GMDA or if user called esp_crypto_shared_gdma_release */ - ret = crypto_shared_gdma_init(); - } - - if (ret != ESP_OK) { - return ret; + esp_err_t ret = crypto_shared_gdma_init(); + if (ret != ESP_OK) { + return ret; + } } /* Tx channel is shared between AES and SHA, need to connect to peripheral every time */ @@ -198,6 +179,20 @@ esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, c return ESP_OK; } +#if SOC_AXI_GDMA_SUPPORTED +bool esp_crypto_shared_gdma_done(void) +{ + int rx_ch_id = 0; + gdma_get_channel_id(rx_channel, &rx_ch_id); + while(1) { + if ((axi_dma_ll_rx_get_interrupt_status(&AXI_DMA, rx_ch_id, true) & 1)) { + break; + } + } + return true; +} +#endif /* SOC_AXI_GDMA_SUPPORTED */ + void esp_crypto_shared_gdma_free() { esp_crypto_sha_aes_lock_acquire(); diff --git a/components/mbedtls/port/include/esp_crypto_shared_gdma.h b/components/mbedtls/port/include/esp_crypto_shared_gdma.h index fd49558912..d6e40fd0fb 100644 --- a/components/mbedtls/port/include/esp_crypto_shared_gdma.h +++ b/components/mbedtls/port/include/esp_crypto_shared_gdma.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2021-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2021-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -43,6 +43,15 @@ esp_err_t esp_crypto_shared_gdma_start(const lldesc_t *input, const lldesc_t *ou */ esp_err_t esp_crypto_shared_gdma_start_axi_ahb(const crypto_dma_desc_t *input, const crypto_dma_desc_t *output, gdma_trigger_peripheral_t peripheral); +#if SOC_AXI_GDMA_SUPPORTED +/** + * @brief Busy wait until GDMA RX data transfer is complete + * + * @return true, when GDMA RX data transfer is complete + */ +bool esp_crypto_shared_gdma_done(void); +#endif /* SOC_AXI_GDMA_SUPPORTED */ + /** * @brief Frees any shared crypto DMA channel, if esp_crypto_shared_gdma_start is called after * this, new GDMA channels will be allocated. From e8268d8b6b4d17080c728a9c30432f0363589016 Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Fri, 1 Mar 2024 14:27:09 +0530 Subject: [PATCH 6/7] feat(hal/aes): use RCC atomic block to enable/reset the AES peripheral --- components/esp_hw_support/esp_ds.c | 16 ++++++-- .../esp_private/esp_crypto_lock_internal.h | 2 + components/hal/esp32/include/hal/aes_ll.h | 40 ++++++++++++++++++- components/hal/esp32c3/include/hal/aes_ll.h | 32 ++++++++++++++- components/hal/esp32c6/include/hal/aes_ll.h | 24 ++++++++++- components/hal/esp32h2/include/hal/aes_ll.h | 24 ++++++++++- components/hal/esp32p4/include/hal/aes_ll.h | 32 ++++++++++++++- components/hal/esp32s2/include/hal/aes_ll.h | 36 ++++++++++++++++- .../hal/esp32s2/include/hal/crypto_dma_ll.h | 33 ++++++++++++++- components/hal/esp32s3/include/hal/aes_ll.h | 32 ++++++++++++++- .../hal/test_apps/crypto/main/aes/aes_block.c | 30 ++++++++------ .../hal/test_apps/crypto/main/ds/test_ds.c | 14 +++++-- components/mbedtls/port/aes/block/esp_aes.c | 12 ++++-- components/mbedtls/port/aes/dma/esp_aes.c | 24 ++++++----- 14 files changed, 311 insertions(+), 40 deletions(-) diff --git a/components/esp_hw_support/esp_ds.c b/components/esp_hw_support/esp_ds.c index 75878d028a..632bd491ff 100644 --- a/components/esp_hw_support/esp_ds.c +++ b/components/esp_hw_support/esp_ds.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -24,6 +24,7 @@ #include "soc/soc_memory_layout.h" #else /* CONFIG_IDF_TARGET_ESP32S2 */ #include "esp_private/periph_ctrl.h" +#include "hal/aes_ll.h" #include "hal/ds_hal.h" #include "hal/ds_ll.h" #include "hal/hmac_hal.h" @@ -438,7 +439,12 @@ esp_err_t esp_ds_encrypt_params(esp_ds_data_t *data, // but just the AES and SHA peripherals, so acquiring locks just for these peripherals // would be enough rather than acquiring a lock for the Digital Signature peripheral. esp_crypto_sha_aes_lock_acquire(); - periph_module_enable(PERIPH_AES_MODULE); + + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(true); + aes_ll_reset_register(); + } + periph_module_enable(PERIPH_SHA_MODULE); ets_ds_data_t *ds_data = (ets_ds_data_t *) data; @@ -451,7 +457,11 @@ esp_err_t esp_ds_encrypt_params(esp_ds_data_t *data, } periph_module_disable(PERIPH_SHA_MODULE); - periph_module_disable(PERIPH_AES_MODULE); + + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(false); + } + esp_crypto_sha_aes_lock_release(); return result; diff --git a/components/esp_hw_support/include/esp_private/esp_crypto_lock_internal.h b/components/esp_hw_support/include/esp_private/esp_crypto_lock_internal.h index 368358fb80..ac25ad1bc1 100644 --- a/components/esp_hw_support/include/esp_private/esp_crypto_lock_internal.h +++ b/components/esp_hw_support/include/esp_private/esp_crypto_lock_internal.h @@ -19,12 +19,14 @@ extern "C" { #define HMAC_RCC_ATOMIC() #define DS_RCC_ATOMIC() #define ECDSA_RCC_ATOMIC() +#define AES_RCC_ATOMIC() #else /* !SOC_RCC_IS_INDEPENDENT */ #define MPI_RCC_ATOMIC() PERIPH_RCC_ATOMIC() #define ECC_RCC_ATOMIC() PERIPH_RCC_ATOMIC() #define HMAC_RCC_ATOMIC() PERIPH_RCC_ATOMIC() #define DS_RCC_ATOMIC() PERIPH_RCC_ATOMIC() #define ECDSA_RCC_ATOMIC() PERIPH_RCC_ATOMIC() +#define AES_RCC_ATOMIC() PERIPH_RCC_ATOMIC() #endif /* SOC_RCC_IS_INDEPENDENT */ #ifdef __cplusplus diff --git a/components/hal/esp32/include/hal/aes_ll.h b/components/hal/esp32/include/hal/aes_ll.h index c9d9ba4908..e3e31a2e9f 100644 --- a/components/hal/esp32/include/hal/aes_ll.h +++ b/components/hal/esp32/include/hal/aes_ll.h @@ -1,15 +1,17 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ #pragma once +#include +#include #include "soc/hwcrypto_reg.h" #include "soc/dport_access.h" +#include "soc/dport_reg.h" #include "hal/aes_types.h" -#include #ifdef __cplusplus extern "C" { @@ -25,6 +27,40 @@ typedef enum { ESP_AES_STATE_IDLE, /* AES accelerator is idle */ } esp_aes_state_t; +/** + * @brief Enable the bus clock for AES peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void aes_ll_enable_bus_clock(bool enable) +{ + if (enable) { + DPORT_SET_PERI_REG_MASK(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES); + } else { + DPORT_CLEAR_PERI_REG_MASK(DPORT_PERI_CLK_EN_REG, DPORT_PERI_EN_AES); + } +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_enable_bus_clock(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_enable_bus_clock(__VA_ARGS__) + +/** + * @brief Reset the AES peripheral module + */ +static inline void aes_ll_reset_register(void) +{ + DPORT_SET_PERI_REG_MASK(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_AES); + DPORT_CLEAR_PERI_REG_MASK(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_AES); + + // Clear reset on digital signature and secure boot also, otherwise AES is held in reset + DPORT_CLEAR_PERI_REG_MASK(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_DIGITAL_SIGNATURE); + DPORT_CLEAR_PERI_REG_MASK(DPORT_PERI_RST_EN_REG, DPORT_PERI_EN_SECUREBOOT); +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_reset_register(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_reset_register(__VA_ARGS__) /** * @brief Write the encryption/decryption key to hardware diff --git a/components/hal/esp32c3/include/hal/aes_ll.h b/components/hal/esp32c3/include/hal/aes_ll.h index 615f69a0f5..58cd75c422 100644 --- a/components/hal/esp32c3/include/hal/aes_ll.h +++ b/components/hal/esp32c3/include/hal/aes_ll.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2022 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -9,6 +9,7 @@ #include #include #include "soc/hwcrypto_reg.h" +#include "soc/system_struct.h" #include "hal/aes_types.h" #ifdef __cplusplus @@ -25,6 +26,35 @@ typedef enum { ESP_AES_STATE_DONE, /* Transform completed */ } esp_aes_state_t; +/** + * @brief Enable the bus clock for AES peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void aes_ll_enable_bus_clock(bool enable) +{ + SYSTEM.perip_clk_en1.reg_crypto_aes_clk_en = enable; +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_enable_bus_clock(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_enable_bus_clock(__VA_ARGS__) + +/** + * @brief Reset the AES peripheral module + */ +static inline void aes_ll_reset_register(void) +{ + SYSTEM.perip_rst_en1.reg_crypto_aes_rst = 1; + SYSTEM.perip_rst_en1.reg_crypto_aes_rst = 0; + + // Clear reset on digital signature also, otherwise AES is held in reset + SYSTEM.perip_rst_en1.reg_crypto_ds_rst = 0; +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_reset_register(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_reset_register(__VA_ARGS__) /** * @brief Write the encryption/decryption key to hardware diff --git a/components/hal/esp32c6/include/hal/aes_ll.h b/components/hal/esp32c6/include/hal/aes_ll.h index cde495f504..f13d417eb5 100644 --- a/components/hal/esp32c6/include/hal/aes_ll.h +++ b/components/hal/esp32c6/include/hal/aes_ll.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -9,6 +9,7 @@ #include #include #include "soc/hwcrypto_reg.h" +#include "soc/pcr_struct.h" #include "hal/aes_types.h" #ifdef __cplusplus @@ -25,6 +26,27 @@ typedef enum { ESP_AES_STATE_DONE, /* Transform completed */ } esp_aes_state_t; +/** + * @brief Enable the bus clock for AES peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void aes_ll_enable_bus_clock(bool enable) +{ + PCR.aes_conf.aes_clk_en = enable; +} + +/** + * @brief Reset the AES peripheral module + */ +static inline void aes_ll_reset_register(void) +{ + PCR.aes_conf.aes_rst_en = 1; + PCR.aes_conf.aes_rst_en = 0; + + // Clear reset on digital signature also, otherwise AES is held in reset + PCR.ds_conf.ds_rst_en = 0; +} /** * @brief Write the encryption/decryption key to hardware diff --git a/components/hal/esp32h2/include/hal/aes_ll.h b/components/hal/esp32h2/include/hal/aes_ll.h index cde495f504..f13d417eb5 100644 --- a/components/hal/esp32h2/include/hal/aes_ll.h +++ b/components/hal/esp32h2/include/hal/aes_ll.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -9,6 +9,7 @@ #include #include #include "soc/hwcrypto_reg.h" +#include "soc/pcr_struct.h" #include "hal/aes_types.h" #ifdef __cplusplus @@ -25,6 +26,27 @@ typedef enum { ESP_AES_STATE_DONE, /* Transform completed */ } esp_aes_state_t; +/** + * @brief Enable the bus clock for AES peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void aes_ll_enable_bus_clock(bool enable) +{ + PCR.aes_conf.aes_clk_en = enable; +} + +/** + * @brief Reset the AES peripheral module + */ +static inline void aes_ll_reset_register(void) +{ + PCR.aes_conf.aes_rst_en = 1; + PCR.aes_conf.aes_rst_en = 0; + + // Clear reset on digital signature also, otherwise AES is held in reset + PCR.ds_conf.ds_rst_en = 0; +} /** * @brief Write the encryption/decryption key to hardware diff --git a/components/hal/esp32p4/include/hal/aes_ll.h b/components/hal/esp32p4/include/hal/aes_ll.h index 031baf2055..4c523ecb9d 100644 --- a/components/hal/esp32p4/include/hal/aes_ll.h +++ b/components/hal/esp32p4/include/hal/aes_ll.h @@ -8,8 +8,9 @@ #include #include -#include "soc/hwcrypto_reg.h" #include "hal/aes_types.h" +#include "soc/hp_sys_clkrst_struct.h" +#include "soc/hwcrypto_reg.h" #ifdef __cplusplus extern "C" { @@ -25,6 +26,35 @@ typedef enum { ESP_AES_STATE_DONE, /* Transform completed */ } esp_aes_state_t; +/** + * @brief Enable the bus clock for AES peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void aes_ll_enable_bus_clock(bool enable) +{ + HP_SYS_CLKRST.peri_clk_ctrl25.reg_crypto_aes_clk_en = enable; +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_enable_bus_clock(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_enable_bus_clock(__VA_ARGS__) + +/** + * @brief Reset the AES peripheral module + */ +static inline void aes_ll_reset_register(void) +{ + HP_SYS_CLKRST.hp_rst_en2.reg_rst_en_aes = 1; + HP_SYS_CLKRST.hp_rst_en2.reg_rst_en_aes = 0; + + // Clear reset on digital signature, otherwise AES is held in reset + HP_SYS_CLKRST.hp_rst_en2.reg_rst_en_ds = 0; +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_reset_register(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_reset_register(__VA_ARGS__) /** * @brief Write the encryption/decryption key to hardware diff --git a/components/hal/esp32s2/include/hal/aes_ll.h b/components/hal/esp32s2/include/hal/aes_ll.h index 1103e53ce8..5fce0db425 100644 --- a/components/hal/esp32s2/include/hal/aes_ll.h +++ b/components/hal/esp32s2/include/hal/aes_ll.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2022 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -8,6 +8,7 @@ #include #include +#include "soc/dport_reg.h" #include "soc/hwcrypto_reg.h" #include "hal/aes_types.h" @@ -26,6 +27,39 @@ typedef enum { ESP_AES_STATE_DONE, /* Transform completed */ } esp_aes_state_t; +/** + * @brief Enable the bus clock for AES peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void aes_ll_enable_bus_clock(bool enable) +{ + if (enable) { + SET_PERI_REG_MASK(DPORT_PERIP_CLK_EN1_REG, DPORT_CRYPTO_AES_CLK_EN); + } else { + CLEAR_PERI_REG_MASK(DPORT_PERIP_CLK_EN1_REG, DPORT_CRYPTO_AES_CLK_EN); + } +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_enable_bus_clock(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_enable_bus_clock(__VA_ARGS__) + +/** + * @brief Reset the AES peripheral module + */ +static inline void aes_ll_reset_register(void) +{ + SET_PERI_REG_MASK(DPORT_PERIP_RST_EN1_REG, DPORT_CRYPTO_AES_RST); + CLEAR_PERI_REG_MASK(DPORT_PERIP_RST_EN1_REG, DPORT_CRYPTO_AES_RST); + + // Clear reset on digital signature and crypto DMA also, otherwise AES is held in reset + CLEAR_PERI_REG_MASK(DPORT_PERIP_RST_EN1_REG, DPORT_CRYPTO_DS_RST); +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_reset_register(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_reset_register(__VA_ARGS__) /** * @brief Write the encryption/decryption key to hardware diff --git a/components/hal/esp32s2/include/hal/crypto_dma_ll.h b/components/hal/esp32s2/include/hal/crypto_dma_ll.h index 5520d807b9..793aeb7364 100644 --- a/components/hal/esp32s2/include/hal/crypto_dma_ll.h +++ b/components/hal/esp32s2/include/hal/crypto_dma_ll.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -25,6 +25,37 @@ typedef enum { CRYPTO_DMA_SHA, } crypto_dma_mode_t; +/** + * @brief Enable the bus clock for crypto DMA peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void crypto_dma_ll_enable_bus_clock(bool enable) +{ + if (enable) { + SET_PERI_REG_MASK(DPORT_PERIP_CLK_EN1_REG, DPORT_CRYPTO_DMA_CLK_EN); + } else { + CLEAR_PERI_REG_MASK(DPORT_PERIP_CLK_EN1_REG, DPORT_CRYPTO_DMA_CLK_EN); + } +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define crypto_dma_ll_enable_bus_clock(...) (void)__DECLARE_RCC_ATOMIC_ENV; crypto_dma_ll_enable_bus_clock(__VA_ARGS__) + +/** + * @brief Reset the crypto DMA peripheral module + */ +static inline void crypto_dma_ll_reset_register(void) +{ + SET_PERI_REG_MASK(DPORT_PERIP_RST_EN1_REG, DPORT_CRYPTO_DMA_RST); + CLEAR_PERI_REG_MASK(DPORT_PERIP_RST_EN1_REG, DPORT_CRYPTO_DMA_RST); +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define crypto_dma_ll_reset_register(...) (void)__DECLARE_RCC_ATOMIC_ENV; crypto_dma_ll_reset_register(__VA_ARGS__) + /** * @brief Resets the DMA * diff --git a/components/hal/esp32s3/include/hal/aes_ll.h b/components/hal/esp32s3/include/hal/aes_ll.h index 615f69a0f5..31b3da92ca 100644 --- a/components/hal/esp32s3/include/hal/aes_ll.h +++ b/components/hal/esp32s3/include/hal/aes_ll.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2020-2022 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2020-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -9,6 +9,7 @@ #include #include #include "soc/hwcrypto_reg.h" +#include "soc/system_struct.h" #include "hal/aes_types.h" #ifdef __cplusplus @@ -25,6 +26,35 @@ typedef enum { ESP_AES_STATE_DONE, /* Transform completed */ } esp_aes_state_t; +/** + * @brief Enable the bus clock for AES peripheral module + * + * @param enable true to enable the module, false to disable the module + */ +static inline void aes_ll_enable_bus_clock(bool enable) +{ + SYSTEM.perip_clk_en1.crypto_aes_clk_en = enable; +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_enable_bus_clock(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_enable_bus_clock(__VA_ARGS__) + +/** + * @brief Reset the AES peripheral module + */ +static inline void aes_ll_reset_register(void) +{ + SYSTEM.perip_rst_en1.crypto_aes_rst = 1; + SYSTEM.perip_rst_en1.crypto_aes_rst = 0; + + // Clear reset on digital signature also, otherwise AES is held in reset + SYSTEM.perip_rst_en1.crypto_ds_rst = 0; +} + +/// use a macro to wrap the function, force the caller to use it in a critical section +/// the critical section needs to declare the __DECLARE_RCC_ATOMIC_ENV variable in advance +#define aes_ll_reset_register(...) (void)__DECLARE_RCC_ATOMIC_ENV; aes_ll_reset_register(__VA_ARGS__) /** * @brief Write the encryption/decryption key to hardware diff --git a/components/hal/test_apps/crypto/main/aes/aes_block.c b/components/hal/test_apps/crypto/main/aes/aes_block.c index 5192ca5377..d64beb007d 100644 --- a/components/hal/test_apps/crypto/main/aes/aes_block.c +++ b/components/hal/test_apps/crypto/main/aes/aes_block.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: CC0-1.0 */ @@ -8,12 +8,10 @@ #include #include -#include "soc/periph_defs.h" -#include "esp_private/periph_ctrl.h" - +#include "esp_private/esp_crypto_lock_internal.h" #include "hal/aes_types.h" #include "hal/aes_hal.h" -#include "hal/clk_gate_ll.h" +#include "hal/aes_ll.h" #if SOC_AES_SUPPORTED @@ -32,8 +30,10 @@ void aes_crypt_cbc_block(int mode, uint32_t *iv_words = (uint32_t *)iv; unsigned char temp[16]; - /* Enable peripheral module by un-gating the clock and de-asserting the reset signal. */ - periph_ll_enable_clk_clear_rst(PERIPH_AES_MODULE); + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(true); + aes_ll_reset_register(); + } /* Sets the key used for AES encryption/decryption */ aes_hal_setkey(key, key_bytes, mode); @@ -71,8 +71,9 @@ void aes_crypt_cbc_block(int mode, } } - /* Disable peripheral module by gating the clock and asserting the reset signal. */ - periph_ll_disable_clk_set_rst(PERIPH_AES_MODULE); + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(false); + } } @@ -88,8 +89,10 @@ void aes_crypt_ctr_block(uint8_t key_bytes, int c, i; size_t n = *nc_off; - /* Enable peripheral module by un-gating the clock and de-asserting the reset signal. */ - periph_ll_enable_clk_clear_rst(PERIPH_AES_MODULE); + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(true); + aes_ll_reset_register(); + } /* Sets the key used for AES encryption/decryption */ aes_hal_setkey(key, key_bytes, ESP_AES_ENCRYPT); @@ -110,8 +113,9 @@ void aes_crypt_ctr_block(uint8_t key_bytes, *nc_off = n; - /* Disable peripheral module by gating the clock and asserting the reset signal. */ - periph_ll_disable_clk_set_rst(PERIPH_AES_MODULE); + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(false); + } } #endif diff --git a/components/hal/test_apps/crypto/main/ds/test_ds.c b/components/hal/test_apps/crypto/main/ds/test_ds.c index b6e5c17647..4d087f588f 100644 --- a/components/hal/test_apps/crypto/main/ds/test_ds.c +++ b/components/hal/test_apps/crypto/main/ds/test_ds.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2023-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -114,6 +114,7 @@ _Static_assert(NUM_RESULTS == NUM_MESSAGES, "expected_results size should be the #if !CONFIG_IDF_TARGET_ESP32S2 #include "esp_private/periph_ctrl.h" +#include "hal/aes_ll.h" #include "hal/ds_hal.h" #include "hal/ds_ll.h" #include "hal/hmac_hal.h" @@ -228,7 +229,11 @@ static esp_err_t esp_ds_encrypt_params(esp_ds_data_t *data, esp_err_t result = ESP_OK; - periph_module_enable(PERIPH_AES_MODULE); + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(true); + aes_ll_reset_register(); + } + periph_module_enable(PERIPH_SHA_MODULE); ets_ds_data_t *ds_data = (ets_ds_data_t *) data; @@ -241,7 +246,10 @@ static esp_err_t esp_ds_encrypt_params(esp_ds_data_t *data, } periph_module_disable(PERIPH_SHA_MODULE); - periph_module_disable(PERIPH_AES_MODULE); + + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(false); + } return result; } diff --git a/components/mbedtls/port/aes/block/esp_aes.c b/components/mbedtls/port/aes/block/esp_aes.c index a83a89f8e9..2f3ed1a572 100644 --- a/components/mbedtls/port/aes/block/esp_aes.c +++ b/components/mbedtls/port/aes/block/esp_aes.c @@ -33,12 +33,13 @@ #include "soc/hwcrypto_periph.h" #include #include "hal/aes_hal.h" +#include "hal/aes_ll.h" #include "esp_aes_internal.h" #include #include -#include "esp_private/periph_ctrl.h" +#include "esp_private/esp_crypto_lock_internal.h" static const char *TAG = "esp-aes"; @@ -58,13 +59,18 @@ void esp_aes_acquire_hardware( void ) portENTER_CRITICAL(&aes_spinlock); /* Enable AES hardware */ - periph_module_enable(PERIPH_AES_MODULE); + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(true); + aes_ll_reset_register(); + } } void esp_aes_release_hardware( void ) { /* Disable AES hardware */ - periph_module_disable(PERIPH_AES_MODULE); + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(false); + } portEXIT_CRITICAL(&aes_spinlock); } diff --git a/components/mbedtls/port/aes/dma/esp_aes.c b/components/mbedtls/port/aes/dma/esp_aes.c index d10c07c5e2..be87512e79 100644 --- a/components/mbedtls/port/aes/dma/esp_aes.c +++ b/components/mbedtls/port/aes/dma/esp_aes.c @@ -28,11 +28,12 @@ #include #include "mbedtls/aes.h" -#include "esp_private/periph_ctrl.h" #include "esp_log.h" #include "esp_crypto_lock.h" #include "hal/aes_hal.h" +#include "hal/aes_ll.h" #include "esp_aes_internal.h" +#include "esp_private/esp_crypto_lock_internal.h" #if SOC_AES_GDMA #define AES_LOCK() esp_crypto_sha_aes_lock_acquire() @@ -40,6 +41,7 @@ #elif SOC_AES_CRYPTO_DMA #define AES_LOCK() esp_crypto_dma_lock_acquire() #define AES_RELEASE() esp_crypto_dma_lock_release() +#include "hal/crypto_dma_ll.h" #endif static const char *TAG = "esp-aes"; @@ -49,23 +51,27 @@ void esp_aes_acquire_hardware( void ) /* Released by esp_aes_release_hardware()*/ AES_LOCK(); - /* Enable AES and DMA hardware */ + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(true); #if SOC_AES_CRYPTO_DMA - periph_module_enable(PERIPH_AES_DMA_MODULE); -#elif SOC_AES_GDMA - periph_module_enable(PERIPH_AES_MODULE); + crypto_dma_ll_enable_bus_clock(true); #endif + aes_ll_reset_register(); +#if SOC_AES_CRYPTO_DMA + crypto_dma_ll_reset_register(); +#endif + } } /* Function to disable AES and Crypto DMA clocks and release locks */ void esp_aes_release_hardware( void ) { - /* Disable AES and DMA hardware */ + AES_RCC_ATOMIC() { + aes_ll_enable_bus_clock(false); #if SOC_AES_CRYPTO_DMA - periph_module_disable(PERIPH_AES_DMA_MODULE); -#elif SOC_AES_GDMA - periph_module_disable(PERIPH_AES_MODULE); + crypto_dma_ll_enable_bus_clock(false); #endif + } AES_RELEASE(); } From bc899d0badeb544f0c54316a0deef3f1b9f7ffed Mon Sep 17 00:00:00 2001 From: "harshal.patil" Date: Sun, 3 Mar 2024 15:56:40 +0530 Subject: [PATCH 7/7] ci(esp-tls): Fix test-specific leak for ESP32-P4 --- components/esp-tls/test_apps/main/app_main.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/components/esp-tls/test_apps/main/app_main.c b/components/esp-tls/test_apps/main/app_main.c index d84480083f..aad946e1f1 100644 --- a/components/esp-tls/test_apps/main/app_main.c +++ b/components/esp-tls/test_apps/main/app_main.c @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: 2021-2023 Espressif Systems (Shanghai) CO LTD + * SPDX-FileCopyrightText: 2021-2024 Espressif Systems (Shanghai) CO LTD * * SPDX-License-Identifier: Apache-2.0 */ @@ -27,19 +27,25 @@ /* setUp runs before every test */ void setUp(void) { +#if SOC_SHA_SUPPORTED // Execute esp_sha operation to allocate internal SHA semaphore (in case of ESP32) // and initial DMA setup memory which is considered as leaked otherwise -#if SOC_SHA_SUPPORTED const uint8_t input_buffer[64] = {0}; uint8_t output_buffer[64]; esp_sha(SHA_TYPE, input_buffer, sizeof(input_buffer), output_buffer); #endif // SOC_SHA_SUPPORTED +#if SOC_AES_SUPPORTED // Execute mbedtls_aes_init operation to allocate AES interrupt // allocation memory which is considered as leak otherwise -#if SOC_AES_SUPPORTED + const uint8_t plaintext[16] = {0}; + uint8_t ciphertext[16]; + const uint8_t key[16] = { 0 }; mbedtls_aes_context ctx; mbedtls_aes_init(&ctx); + mbedtls_aes_setkey_enc(&ctx, key, 128); + mbedtls_aes_crypt_ecb(&ctx, MBEDTLS_AES_ENCRYPT, plaintext, ciphertext); + mbedtls_aes_free(&ctx); #endif // SOC_AES_SUPPORTED test_utils_record_free_mem();