diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c25dba04..0b9429bd2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,7 @@ ENDIF () ADD_LIBRARY(picotls-core ${CORE_FILES}) TARGET_LINK_LIBRARIES(picotls-core ${CORE_EXTRA_LIBS}) + ADD_LIBRARY(picotls-minicrypto ${MINICRYPTO_LIBRARY_FILES} lib/cifra.c @@ -90,7 +91,6 @@ ADD_EXECUTABLE(test-minicrypto.t lib/cifra/aes128.c lib/cifra/aes256.c lib/cifra/random.c) - SET(TEST_EXES test-minicrypto.t) FIND_PACKAGE(OpenSSL) @@ -131,6 +131,24 @@ ELSE () MESSAGE(WARNING "Disabling OpenSSL support (requires 1.0.1 or newer)") ENDIF () +IF ((CMAKE_SIZEOF_VOID_P EQUAL 8) AND + (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") OR + (CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") OR + (CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64")) + MESSAGE(STATUS " Enabling fusion support") + ADD_LIBRARY(picotls-fusion lib/fusion.c) + SET_TARGET_PROPERTIES(picotls-fusion PROPERTIES COMPILE_FLAGS "-mavx2 -maes -mpclmul") + TARGET_LINK_LIBRARIES(picotls-fusion picotls-core) + ADD_EXECUTABLE(test-fusion.t + deps/picotest/picotest.c + lib/picotls.c + t/fusion.c) + TARGET_LINK_LIBRARIES(test-fusion.t picotls-minicrypto) + SET_TARGET_PROPERTIES(test-fusion.t PROPERTIES COMPILE_FLAGS "-mavx2 -maes -mpclmul") + ADD_DEPENDENCIES(test-fusion.t generate-picotls-probes) + SET(TEST_EXES ${TEST_EXES} test-fusion.t) +ENDIF () + ADD_CUSTOM_TARGET(check env BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR} prove --exec '' -v ${CMAKE_CURRENT_BINARY_DIR}/*.t t/*.t WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${TEST_EXES} cli) IF (CMAKE_SYSTEM_NAME STREQUAL "Linux") diff --git a/cmake/dtrace-utils.cmake b/cmake/dtrace-utils.cmake index da566461f..191953174 100644 --- a/cmake/dtrace-utils.cmake +++ b/cmake/dtrace-utils.cmake @@ -27,10 +27,14 @@ FUNCTION (DEFINE_DTRACE_DEPENDENCIES d_file prefix) OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.h COMMAND dtrace -o ${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.h -s ${d_file} -h DEPENDS ${d_file}) + ADD_CUSTOM_TARGET(generate-${prefix}-probes DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.h) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.h PROPERTIES GENERATED TRUE) IF (DTRACE_USES_OBJFILE) ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.o COMMAND dtrace -o ${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.o -s ${d_file} -G DEPENDS ${d_file}) + ADD_DEPENDENCIES(generate-${prefix}-probes ${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.o) + SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/${prefix}-probes.o PROPERTIES GENERATED TRUE) ENDIF () ENDFUNCTION () diff --git a/include/picotls.h b/include/picotls.h index e4c4499d7..3be52780f 100644 --- a/include/picotls.h +++ b/include/picotls.h @@ -32,6 +32,7 @@ extern "C" { #include #include +#include #include #if __GNUC__ >= 3 @@ -303,19 +304,26 @@ typedef const struct st_ptls_cipher_algorithm_t { int (*setup_crypto)(ptls_cipher_context_t *ctx, int is_enc, const void *key); } ptls_cipher_algorithm_t; +typedef struct st_ptls_aead_supplementary_encryption_t { + ptls_cipher_context_t *ctx; + const void *input; + uint8_t output[16]; +} ptls_aead_supplementary_encryption_t; + /** * AEAD context. AEAD implementations are allowed to stuff data at the end of the struct. The size of the memory allocated for the * struct is governed by ptls_aead_algorithm_t::context_size. */ typedef struct st_ptls_aead_context_t { const struct st_ptls_aead_algorithm_t *algo; - uint8_t static_iv[PTLS_MAX_IV_SIZE]; /* field above this line must not be altered by the crypto binding */ void (*dispose_crypto)(struct st_ptls_aead_context_t *ctx); - void (*do_encrypt_init)(struct st_ptls_aead_context_t *ctx, const void *iv, const void *aad, size_t aadlen); + void (*do_encrypt_init)(struct st_ptls_aead_context_t *ctx, uint64_t seq, const void *aad, size_t aadlen); size_t (*do_encrypt_update)(struct st_ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen); size_t (*do_encrypt_final)(struct st_ptls_aead_context_t *ctx, void *output); - size_t (*do_decrypt)(struct st_ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, const void *iv, + void (*do_encrypt)(struct st_ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp); + size_t (*do_decrypt)(struct st_ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, const void *aad, size_t aadlen); } ptls_aead_context_t; @@ -355,7 +363,7 @@ typedef const struct st_ptls_aead_algorithm_t { /** * callback that sets up the crypto */ - int (*setup_crypto)(ptls_aead_context_t *ctx, int is_enc, const void *key); + int (*setup_crypto)(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv); } ptls_aead_algorithm_t; /** @@ -1192,6 +1200,13 @@ static void ptls_cipher_encrypt(ptls_cipher_context_t *ctx, void *output, const */ ptls_aead_context_t *ptls_aead_new(ptls_aead_algorithm_t *aead, ptls_hash_algorithm_t *hash, int is_enc, const void *secret, const char *label_prefix); +/** + * instantiates an AEAD cipher given key and iv + * @param aead + * @param is_enc 1 if creating a context for encryption, 0 if creating a context for decryption + * @return pointer to an AEAD context if successful, otherwise NULL + */ +ptls_aead_context_t *ptls_aead_new_direct(ptls_aead_algorithm_t *aead, int is_enc, const void *key, const void *iv); /** * destroys an AEAD cipher context */ @@ -1199,8 +1214,10 @@ void ptls_aead_free(ptls_aead_context_t *ctx); /** * */ -size_t ptls_aead_encrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, const void *aad, - size_t aadlen); +static size_t ptls_aead_encrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen); +static void ptls_aead_encrypt_s(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp); /** * initializes the internal state of the encryptor */ @@ -1251,7 +1268,12 @@ int ptls_server_handle_message(ptls_t *tls, ptls_buffer_t *sendbuf, size_t epoch /** * internal */ -void ptls_aead__build_iv(ptls_aead_context_t *ctx, uint8_t *iv, uint64_t seq); +void ptls_aead__build_iv(ptls_aead_algorithm_t *algo, uint8_t *iv, const uint8_t *static_iv, uint64_t seq); +/** + * + */ +static void ptls_aead__do_encrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp); /** * internal */ @@ -1374,12 +1396,22 @@ inline void ptls_cipher_encrypt(ptls_cipher_context_t *ctx, void *output, const ctx->do_transform(ctx, output, input, len); } -inline void ptls_aead_encrypt_init(ptls_aead_context_t *ctx, uint64_t seq, const void *aad, size_t aadlen) +inline size_t ptls_aead_encrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen) { - uint8_t iv[PTLS_MAX_IV_SIZE]; + ctx->do_encrypt(ctx, output, input, inlen, seq, aad, aadlen, NULL); + return inlen + ctx->algo->tag_size; +} + +inline void ptls_aead_encrypt_s(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp) +{ + ctx->do_encrypt(ctx, output, input, inlen, seq, aad, aadlen, supp); +} - ptls_aead__build_iv(ctx, iv, seq); - ctx->do_encrypt_init(ctx, iv, aad, aadlen); +inline void ptls_aead_encrypt_init(ptls_aead_context_t *ctx, uint64_t seq, const void *aad, size_t aadlen) +{ + ctx->do_encrypt_init(ctx, seq, aad, aadlen); } inline size_t ptls_aead_encrypt_update(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen) @@ -1392,13 +1424,24 @@ inline size_t ptls_aead_encrypt_final(ptls_aead_context_t *ctx, void *output) return ctx->do_encrypt_final(ctx, output); } +inline void ptls_aead__do_encrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp) +{ + ctx->do_encrypt_init(ctx, seq, aad, aadlen); + ctx->do_encrypt_update(ctx, output, input, inlen); + ctx->do_encrypt_final(ctx, (uint8_t *)output + inlen); + + if (supp != NULL) { + ptls_cipher_init(supp->ctx, supp->input); + memset(supp->output, 0, sizeof(supp->output)); + ptls_cipher_encrypt(supp->ctx, supp->output, supp->output, sizeof(supp->output)); + } +} + inline size_t ptls_aead_decrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, const void *aad, size_t aadlen) { - uint8_t iv[PTLS_MAX_IV_SIZE]; - - ptls_aead__build_iv(ctx, iv, seq); - return ctx->do_decrypt(ctx, output, input, inlen, iv, aad, aadlen); + return ctx->do_decrypt(ctx, output, input, inlen, seq, aad, aadlen); } #define ptls_define_hash(name, ctx_type, init_func, update_func, final_func) \ diff --git a/include/picotls/fusion.h b/include/picotls/fusion.h new file mode 100644 index 000000000..332dd93ab --- /dev/null +++ b/include/picotls/fusion.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2020 Fastly, Kazuho Oku + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef picotls_fusion_h +#define picotls_fusion_h + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include "../picotls.h" + +#define PTLS_FUSION_AES128_ROUNDS 10 +#define PTLS_FUSION_AES256_ROUNDS 14 + +typedef struct ptls_fusion_aesecb_context { + __m128i keys[PTLS_FUSION_AES256_ROUNDS + 1]; + unsigned rounds; +} ptls_fusion_aesecb_context_t; + +typedef struct ptls_fusion_aesgcm_context ptls_fusion_aesgcm_context_t; + +void ptls_fusion_aesecb_init(ptls_fusion_aesecb_context_t *ctx, int is_enc, const void *key, size_t key_size); +void ptls_fusion_aesecb_dispose(ptls_fusion_aesecb_context_t *ctx); +void ptls_fusion_aesecb_encrypt(ptls_fusion_aesecb_context_t *ctx, void *dst, const void *src); + +/** + * Creates an AES-GCM context. + * @param key the AES key (128 bits) + * @param capacity maximum size of AEAD record (i.e. AAD + encrypted payload) + */ +ptls_fusion_aesgcm_context_t *ptls_fusion_aesgcm_new(const void *key, size_t key_size, size_t capacity); +/** + * Updates the capacity. + */ +ptls_fusion_aesgcm_context_t *ptls_fusion_aesgcm_set_capacity(ptls_fusion_aesgcm_context_t *ctx, size_t capacity); +/** + * Destroys an AES-GCM context. + */ +void ptls_fusion_aesgcm_free(ptls_fusion_aesgcm_context_t *ctx); +/** + * Encrypts an AEAD block, and in parallel, optionally encrypts one block using AES-ECB. + * @param ctx context + * @param output output buffer + * @param input payload to be encrypted + * @param inlen size of the payload to be encrypted + * @param counter + * @param aad AAD + * @param aadlen size of AAD + * @param supp (optional) supplementary encryption context + */ +void ptls_fusion_aesgcm_encrypt(ptls_fusion_aesgcm_context_t *ctx, void *output, const void *input, size_t inlen, __m128i ctr, + const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp); +/** + * Decrypts an AEAD block, an in parallel, optionally encrypts one block using AES-ECB. Returns if decryption was successful. + * @param iv initialization vector of 12 bytes + * @param output output buffer + * @param input payload to be decrypted + * @param inlen size of the payload to be decrypted + * @param aad AAD + * @param aadlen size of AAD + * @param tag the AEAD tag being received from peer + */ +int ptls_fusion_aesgcm_decrypt(ptls_fusion_aesgcm_context_t *ctx, void *output, const void *input, size_t inlen, __m128i ctr, + const void *aad, size_t aadlen, const void *tag); + +extern ptls_cipher_algorithm_t ptls_fusion_aes128ctr, ptls_fusion_aes256ctr; +extern ptls_aead_algorithm_t ptls_fusion_aes128gcm, ptls_fusion_aes256gcm; + +/** + * Returns a boolean indicating if fusion can be used. + */ +int ptls_fusion_is_supported_by_cpu(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/cifra/aes-common.h b/lib/cifra/aes-common.h index 9823337ed..0c393c57b 100644 --- a/lib/cifra/aes-common.h +++ b/lib/cifra/aes-common.h @@ -20,6 +20,7 @@ * IN THE SOFTWARE. */ #include +#include #include "aes.h" #include "modes.h" #include "sha2.h" @@ -51,13 +52,13 @@ static inline void aesecb_decrypt(ptls_cipher_context_t *_ctx, void *output, con cf_aes_decrypt(&ctx->aes, input, output); } -static inline int aesecb_setup_crypto(ptls_cipher_context_t *_ctx, int is_enc, const void *key, size_t key_size) +static inline int aesecb_setup_crypto(ptls_cipher_context_t *_ctx, int is_enc, const void *key) { struct aesecb_context_t *ctx = (struct aesecb_context_t *)_ctx; ctx->super.do_dispose = aesecb_dispose; ctx->super.do_init = NULL; ctx->super.do_transform = is_enc ? aesecb_encrypt : aesecb_decrypt; - cf_aes_init(&ctx->aes, key, key_size); + cf_aes_init(&ctx->aes, key, ctx->super.algo->key_size); return 0; } @@ -85,13 +86,13 @@ static inline void aesctr_transform(ptls_cipher_context_t *_ctx, void *output, c cf_ctr_cipher(&ctx->ctr, input, output, len); } -static inline int aesctr_setup_crypto(ptls_cipher_context_t *_ctx, int is_enc, const void *key, size_t key_size) +static inline int aesctr_setup_crypto(ptls_cipher_context_t *_ctx, int is_enc, const void *key) { struct aesctr_context_t *ctx = (struct aesctr_context_t *)_ctx; ctx->super.do_dispose = aesctr_dispose; ctx->super.do_init = aesctr_init; ctx->super.do_transform = aesctr_transform; - cf_aes_init(&ctx->aes, key, key_size); + cf_aes_init(&ctx->aes, key, ctx->super.algo->key_size); return 0; } @@ -99,6 +100,7 @@ struct aesgcm_context_t { ptls_aead_context_t super; cf_aes_context aes; cf_gcm_ctx gcm; + uint8_t static_iv[PTLS_AESGCM_IV_SIZE]; }; static inline void aesgcm_dispose_crypto(ptls_aead_context_t *_ctx) @@ -109,10 +111,12 @@ static inline void aesgcm_dispose_crypto(ptls_aead_context_t *_ctx) ptls_clear_memory((uint8_t *)ctx + sizeof(ctx->super), sizeof(*ctx) - sizeof(ctx->super)); } -static inline void aesgcm_encrypt_init(ptls_aead_context_t *_ctx, const void *iv, const void *aad, size_t aadlen) +static inline void aesgcm_encrypt_init(ptls_aead_context_t *_ctx, uint64_t seq, const void *aad, size_t aadlen) { struct aesgcm_context_t *ctx = (struct aesgcm_context_t *)_ctx; + uint8_t iv[PTLS_AES_BLOCK_SIZE]; + ptls_aead__build_iv(ctx->super.algo, iv, ctx->static_iv, seq); cf_gcm_encrypt_init(&cf_aes, &ctx->aes, &ctx->gcm, aad, aadlen, iv, PTLS_AESGCM_IV_SIZE); } @@ -132,15 +136,17 @@ static inline size_t aesgcm_encrypt_final(ptls_aead_context_t *_ctx, void *outpu return PTLS_AESGCM_TAG_SIZE; } -static inline size_t aesgcm_decrypt(ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen, const void *iv, - const void *aad, size_t aadlen) +static inline size_t aesgcm_decrypt(ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen) { struct aesgcm_context_t *ctx = (struct aesgcm_context_t *)_ctx; + uint8_t iv[PTLS_AES_BLOCK_SIZE]; if (inlen < PTLS_AESGCM_TAG_SIZE) return SIZE_MAX; size_t tag_offset = inlen - PTLS_AESGCM_TAG_SIZE; + ptls_aead__build_iv(ctx->super.algo, iv, ctx->static_iv, seq); if (cf_gcm_decrypt(&cf_aes, &ctx->aes, input, tag_offset, aad, aadlen, iv, PTLS_AESGCM_IV_SIZE, (uint8_t *)input + tag_offset, PTLS_AESGCM_TAG_SIZE, output) != 0) return SIZE_MAX; @@ -148,7 +154,7 @@ static inline size_t aesgcm_decrypt(ptls_aead_context_t *_ctx, void *output, con return tag_offset; } -static inline int aead_aesgcm_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void *key, size_t key_size) +static inline int aead_aesgcm_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void *key, const void *iv) { struct aesgcm_context_t *ctx = (struct aesgcm_context_t *)_ctx; @@ -157,6 +163,7 @@ static inline int aead_aesgcm_setup_crypto(ptls_aead_context_t *_ctx, int is_enc ctx->super.do_encrypt_init = aesgcm_encrypt_init; ctx->super.do_encrypt_update = aesgcm_encrypt_update; ctx->super.do_encrypt_final = aesgcm_encrypt_final; + ctx->super.do_encrypt = ptls_aead__do_encrypt; ctx->super.do_decrypt = NULL; } else { ctx->super.do_encrypt_init = NULL; @@ -165,6 +172,7 @@ static inline int aead_aesgcm_setup_crypto(ptls_aead_context_t *_ctx, int is_enc ctx->super.do_decrypt = aesgcm_decrypt; } - cf_aes_init(&ctx->aes, key, key_size); + cf_aes_init(&ctx->aes, key, ctx->super.algo->key_size); + memcpy(ctx->static_iv, iv, sizeof(ctx->static_iv)); return 0; } diff --git a/lib/cifra/aes128.c b/lib/cifra/aes128.c index 31a25985e..7f6817ff7 100644 --- a/lib/cifra/aes128.c +++ b/lib/cifra/aes128.c @@ -23,17 +23,17 @@ static int aes128ecb_setup_crypto(ptls_cipher_context_t *ctx, int is_enc, const void *key) { - return aesecb_setup_crypto(ctx, is_enc, key, PTLS_AES128_KEY_SIZE); + return aesecb_setup_crypto(ctx, is_enc, key); } static int aes128ctr_setup_crypto(ptls_cipher_context_t *ctx, int is_enc, const void *key) { - return aesctr_setup_crypto(ctx, is_enc, key, PTLS_AES128_KEY_SIZE); + return aesctr_setup_crypto(ctx, is_enc, key); } -static int aead_aes128gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key) +static int aead_aes128gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv) { - return aead_aesgcm_setup_crypto(ctx, is_enc, key, PTLS_AES128_KEY_SIZE); + return aead_aesgcm_setup_crypto(ctx, is_enc, key, iv); } ptls_define_hash(sha256, cf_sha256_context, cf_sha256_init, cf_sha256_update, cf_sha256_digest_final); diff --git a/lib/cifra/aes256.c b/lib/cifra/aes256.c index b2cec2c86..5b925be8d 100644 --- a/lib/cifra/aes256.c +++ b/lib/cifra/aes256.c @@ -23,17 +23,17 @@ static int aes256ecb_setup_crypto(ptls_cipher_context_t *ctx, int is_enc, const void *key) { - return aesecb_setup_crypto(ctx, is_enc, key, PTLS_AES256_KEY_SIZE); + return aesecb_setup_crypto(ctx, is_enc, key); } static int aes256ctr_setup_crypto(ptls_cipher_context_t *ctx, int is_enc, const void *key) { - return aesctr_setup_crypto(ctx, is_enc, key, PTLS_AES256_KEY_SIZE); + return aesctr_setup_crypto(ctx, is_enc, key); } -static int aead_aes256gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key) +static int aead_aes256gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv) { - return aead_aesgcm_setup_crypto(ctx, is_enc, key, PTLS_AES256_KEY_SIZE); + return aead_aesgcm_setup_crypto(ctx, is_enc, key, iv); } ptls_define_hash(sha384, cf_sha512_context, cf_sha384_init, cf_sha384_update, cf_sha384_digest_final); diff --git a/lib/cifra/chacha20.c b/lib/cifra/chacha20.c index ab956fae3..a5f302fa8 100644 --- a/lib/cifra/chacha20.c +++ b/lib/cifra/chacha20.c @@ -67,6 +67,7 @@ static int chacha20_setup_crypto(ptls_cipher_context_t *_ctx, int is_enc, const struct chacha20poly1305_context_t { ptls_aead_context_t super; uint8_t key[PTLS_CHACHA20_KEY_SIZE]; + uint8_t static_iv[PTLS_CHACHA20POLY1305_IV_SIZE]; cf_chacha20_ctx chacha; cf_poly1305 poly; size_t aadlen; @@ -102,14 +103,14 @@ static void chacha20poly1305_finalize(struct chacha20poly1305_context_t *ctx, ui cf_poly1305_finish(&ctx->poly, tag); } -static void chacha20poly1305_init(ptls_aead_context_t *_ctx, const void *iv, const void *aad, size_t aadlen) +static void chacha20poly1305_init(ptls_aead_context_t *_ctx, uint64_t seq, const void *aad, size_t aadlen) { struct chacha20poly1305_context_t *ctx = (struct chacha20poly1305_context_t *)_ctx; uint8_t tmpbuf[64]; /* init chacha */ memset(tmpbuf, 0, 16 - PTLS_CHACHA20POLY1305_IV_SIZE); - memcpy(tmpbuf + 16 - PTLS_CHACHA20POLY1305_IV_SIZE, iv, PTLS_CHACHA20POLY1305_IV_SIZE); + ptls_aead__build_iv(ctx->super.algo, tmpbuf + 16 - PTLS_CHACHA20POLY1305_IV_SIZE, ctx->static_iv, seq); cf_chacha20_init_custom(&ctx->chacha, ctx->key, sizeof(ctx->key), tmpbuf, 4); /* init poly1305 (by using first 16 bytes of the key stream of the first block) */ @@ -149,7 +150,7 @@ static size_t chacha20poly1305_encrypt_final(ptls_aead_context_t *_ctx, void *ou return PTLS_CHACHA20POLY1305_TAG_SIZE; } -static size_t chacha20poly1305_decrypt(ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen, const void *iv, +static size_t chacha20poly1305_decrypt(ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen, uint64_t seq, const void *aad, size_t aadlen) { struct chacha20poly1305_context_t *ctx = (struct chacha20poly1305_context_t *)_ctx; @@ -159,7 +160,7 @@ static size_t chacha20poly1305_decrypt(ptls_aead_context_t *_ctx, void *output, if (inlen < sizeof(tag)) return SIZE_MAX; - chacha20poly1305_init(&ctx->super, iv, aad, aadlen); + chacha20poly1305_init(&ctx->super, seq, aad, aadlen); cf_poly1305_update(&ctx->poly, input, inlen - sizeof(tag)); ctx->textlen = inlen - sizeof(tag); @@ -178,7 +179,7 @@ static size_t chacha20poly1305_decrypt(ptls_aead_context_t *_ctx, void *output, return ret; } -static int aead_chacha20poly1305_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void *key) +static int aead_chacha20poly1305_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void *key, const void *iv) { struct chacha20poly1305_context_t *ctx = (struct chacha20poly1305_context_t *)_ctx; @@ -187,6 +188,7 @@ static int aead_chacha20poly1305_setup_crypto(ptls_aead_context_t *_ctx, int is_ ctx->super.do_encrypt_init = chacha20poly1305_init; ctx->super.do_encrypt_update = chacha20poly1305_encrypt_update; ctx->super.do_encrypt_final = chacha20poly1305_encrypt_final; + ctx->super.do_encrypt = ptls_aead__do_encrypt; ctx->super.do_decrypt = NULL; } else { ctx->super.do_encrypt_init = NULL; @@ -196,6 +198,7 @@ static int aead_chacha20poly1305_setup_crypto(ptls_aead_context_t *_ctx, int is_ } memcpy(ctx->key, key, sizeof(ctx->key)); + memcpy(ctx->static_iv, iv, sizeof(ctx->static_iv)); return 0; } diff --git a/lib/fusion.c b/lib/fusion.c new file mode 100644 index 000000000..3af2bd175 --- /dev/null +++ b/lib/fusion.c @@ -0,0 +1,1010 @@ +/* + * This source file is licensed under the Apache License 2.0 *and* the MIT + * License. Please agree to *both* of the licensing terms! + * + * + * `transformH` function is a derivative work of OpenSSL. The original work + * is covered by the following license: + * + * Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + * + * + * All other work, including modifications to the `transformH` function is + * covered by the following MIT license: + * + * Copyright (c) 2020 Fastly, Kazuho Oku + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include "picotls.h" +#include "picotls/fusion.h" + +struct ptls_fusion_aesgcm_context { + ptls_fusion_aesecb_context_t ecb; + size_t capacity; + size_t ghash_cnt; + struct ptls_fusion_aesgcm_ghash_precompute { + __m128i H; + __m128i r; + } ghash[0]; +}; + +struct ctr_context { + ptls_cipher_context_t super; + ptls_fusion_aesecb_context_t fusion; + __m128i bits; + uint8_t is_ready; +}; + +struct aesgcm_context { + ptls_aead_context_t super; + ptls_fusion_aesgcm_context_t *aesgcm; + /** + * retains the static IV in the upper 96 bits (in little endian) + */ + __m128i static_iv; +}; + +static const uint64_t poly_[2] __attribute__((aligned(16))) = {1, 0xc200000000000000}; +#define poly (*(__m128i *)poly_) +static const uint8_t bswap8_[16] __attribute__((aligned(16))) = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; +#define bswap8 (*(__m128i *)bswap8_) +static const uint8_t one8_[16] __attribute__((aligned(16))) = {1}; +#define one8 (*(__m128i *)one8_) + +/* This function is covered by the Apache License and the MIT License. The origin is crypto/modes/asm/ghash-x86_64.pl of openssl + * at commit 33388b4. */ +static __m128i transformH(__m128i H) +{ + // # <<1 twist + // pshufd \$0b11111111,$Hkey,$T2 # broadcast uppermost dword + __m128i t2 = _mm_shuffle_epi32(H, 0xff); + // movdqa $Hkey,$T1 + __m128i t1 = H; + // psllq \$1,$Hkey + H = _mm_slli_epi64(H, 1); + // pxor $T3,$T3 # + __m128i t3 = _mm_setzero_si128(); + // psrlq \$63,$T1 + t1 = _mm_srli_epi64(t1, 63); + // pcmpgtd $T2,$T3 # broadcast carry bit + t3 = _mm_cmplt_epi32(t2, t3); + // pslldq \$8,$T1 + t1 = _mm_slli_si128(t1, 8); + // por $T1,$Hkey # H<<=1 + H = _mm_or_si128(t1, H); + + // # magic reduction + // pand .L0x1c2_polynomial(%rip),$T3 + t3 = _mm_and_si128(t3, poly); + // pxor $T3,$Hkey # if(carry) H^=0x1c2_polynomial + H = _mm_xor_si128(t3, H); + + return H; +} +// end of Apache License code + +static __m128i gfmul(__m128i x, __m128i y) +{ + __m128i lo = _mm_clmulepi64_si128(x, y, 0x00); + __m128i hi = _mm_clmulepi64_si128(x, y, 0x11); + + __m128i a = _mm_shuffle_epi32(x, 78); + __m128i b = _mm_shuffle_epi32(y, 78); + a = _mm_xor_si128(a, x); + b = _mm_xor_si128(b, y); + + a = _mm_clmulepi64_si128(a, b, 0x00); + a = _mm_xor_si128(a, lo); + a = _mm_xor_si128(a, hi); + + b = _mm_slli_si128(a, 8); + a = _mm_srli_si128(a, 8); + + lo = _mm_xor_si128(lo, b); + hi = _mm_xor_si128(hi, a); + + // from https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf + __m128i t = _mm_clmulepi64_si128(lo, poly, 0x10); + lo = _mm_shuffle_epi32(lo, 78); + lo = _mm_xor_si128(lo, t); + t = _mm_clmulepi64_si128(lo, poly, 0x10); + lo = _mm_shuffle_epi32(lo, 78); + lo = _mm_xor_si128(lo, t); + + return _mm_xor_si128(hi, lo); +} + +struct ptls_fusion_gfmul_state { + __m128i hi, lo, mid; +}; + +static inline void gfmul_onestep(struct ptls_fusion_gfmul_state *gstate, __m128i X, + struct ptls_fusion_aesgcm_ghash_precompute *precompute) +{ + X = _mm_shuffle_epi8(X, bswap8); + __m128i t = _mm_clmulepi64_si128(precompute->H, X, 0x00); + gstate->lo = _mm_xor_si128(gstate->lo, t); + t = _mm_clmulepi64_si128(precompute->H, X, 0x11); + gstate->hi = _mm_xor_si128(gstate->hi, t); + t = _mm_shuffle_epi32(X, 78); + t = _mm_xor_si128(t, X); + t = _mm_clmulepi64_si128(precompute->r, t, 0x00); + gstate->mid = _mm_xor_si128(gstate->mid, t); +} + +static inline __m128i gfmul_final(struct ptls_fusion_gfmul_state *gstate, __m128i ek0) +{ + /* finish multiplication */ + gstate->mid = _mm_xor_si128(gstate->mid, gstate->hi); + gstate->mid = _mm_xor_si128(gstate->mid, gstate->lo); + gstate->lo = _mm_xor_si128(gstate->lo, _mm_slli_si128(gstate->mid, 8)); + gstate->hi = _mm_xor_si128(gstate->hi, _mm_srli_si128(gstate->mid, 8)); + + /* fast reduction, using https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf */ + __m128i r = _mm_clmulepi64_si128(gstate->lo, poly, 0x10); + gstate->lo = _mm_shuffle_epi32(gstate->lo, 78); + gstate->lo = _mm_xor_si128(gstate->lo, r); + r = _mm_clmulepi64_si128(gstate->lo, poly, 0x10); + gstate->lo = _mm_shuffle_epi32(gstate->lo, 78); + gstate->lo = _mm_xor_si128(gstate->lo, r); + __m128i tag = _mm_xor_si128(gstate->hi, gstate->lo); + tag = _mm_shuffle_epi8(tag, bswap8); + tag = _mm_xor_si128(tag, ek0); + + return tag; +} + +static inline __m128i aesecb_encrypt(ptls_fusion_aesecb_context_t *ctx, __m128i v) +{ + size_t i; + + v = _mm_xor_si128(v, ctx->keys[0]); + for (i = 1; i < ctx->rounds; ++i) + v = _mm_aesenc_si128(v, ctx->keys[i]); + v = _mm_aesenclast_si128(v, ctx->keys[i]); + + return v; +} + +static const uint8_t loadn_mask[31] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; +static const uint8_t loadn_shuffle[31] = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, // first 16 bytes map to byte offsets + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; // latter 15 bytes map to zero + +static inline __m128i loadn(const void *p, size_t l) +{ + __m128i v, mask = _mm_loadu_si128((__m128i *)(loadn_mask + 16 - l)); + uintptr_t mod4k = (uintptr_t)p % 4096; + + if (PTLS_LIKELY(mod4k <= 4080) || mod4k + l > 4096) { + v = _mm_loadu_si128(p); + } else { + uintptr_t shift = (uintptr_t)p & 15; + __m128i pattern = _mm_loadu_si128((const __m128i *)(loadn_shuffle + shift)); + v = _mm_shuffle_epi8(_mm_load_si128((const __m128i *)((uintptr_t)p - shift)), pattern); + } + v = _mm_and_si128(v, mask); + return v; +} + +static inline void storen(void *_p, size_t l, __m128i v) +{ + uint8_t buf[16], *p = _p; + + *(__m128i *)buf = v; + + for (size_t i = 0; i != l; ++i) + p[i] = buf[i]; +} + +void ptls_fusion_aesgcm_encrypt(ptls_fusion_aesgcm_context_t *ctx, void *output, const void *input, size_t inlen, __m128i ctr, + const void *_aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp) +{ +/* init the bits (we can always run in full), but use the last slot for calculating ek0, if possible */ +#define AESECB6_INIT() \ + do { \ + ctr = _mm_add_epi64(ctr, one8); \ + bits0 = _mm_shuffle_epi8(ctr, bswap8); \ + ctr = _mm_add_epi64(ctr, one8); \ + bits1 = _mm_shuffle_epi8(ctr, bswap8); \ + ctr = _mm_add_epi64(ctr, one8); \ + bits2 = _mm_shuffle_epi8(ctr, bswap8); \ + ctr = _mm_add_epi64(ctr, one8); \ + bits3 = _mm_shuffle_epi8(ctr, bswap8); \ + ctr = _mm_add_epi64(ctr, one8); \ + bits4 = _mm_shuffle_epi8(ctr, bswap8); \ + if (PTLS_LIKELY(srclen > 16 * 5)) { \ + ctr = _mm_add_epi64(ctr, one8); \ + bits5 = _mm_shuffle_epi8(ctr, bswap8); \ + } else { \ + if ((state & STATE_EK0_BEEN_FED) == 0) { \ + bits5 = ek0; \ + state |= STATE_EK0_BEEN_FED; \ + } \ + if ((state & STATE_SUPP_USED) != 0 && srclen <= 16 * 4 && (const __m128i *)supp->input + 1 <= dst_ghash) { \ + bits4 = _mm_loadu_si128(supp->input); \ + bits4keys = ((struct ctr_context *)supp->ctx)->fusion.keys; \ + state |= STATE_SUPP_IN_PROCESS; \ + } \ + } \ + __m128i k = ctx->ecb.keys[0]; \ + bits0 = _mm_xor_si128(bits0, k); \ + bits1 = _mm_xor_si128(bits1, k); \ + bits2 = _mm_xor_si128(bits2, k); \ + bits3 = _mm_xor_si128(bits3, k); \ + bits4 = _mm_xor_si128(bits4, bits4keys[0]); \ + bits5 = _mm_xor_si128(bits5, k); \ + } while (0) + +/* aes block update */ +#define AESECB6_UPDATE(i) \ + do { \ + __m128i k = ctx->ecb.keys[i]; \ + bits0 = _mm_aesenc_si128(bits0, k); \ + bits1 = _mm_aesenc_si128(bits1, k); \ + bits2 = _mm_aesenc_si128(bits2, k); \ + bits3 = _mm_aesenc_si128(bits3, k); \ + bits4 = _mm_aesenc_si128(bits4, bits4keys[i]); \ + bits5 = _mm_aesenc_si128(bits5, k); \ + } while (0) + +/* aesenclast */ +#define AESECB6_FINAL(i) \ + do { \ + __m128i k = ctx->ecb.keys[i]; \ + bits0 = _mm_aesenclast_si128(bits0, k); \ + bits1 = _mm_aesenclast_si128(bits1, k); \ + bits2 = _mm_aesenclast_si128(bits2, k); \ + bits3 = _mm_aesenclast_si128(bits3, k); \ + bits4 = _mm_aesenclast_si128(bits4, bits4keys[i]); \ + bits5 = _mm_aesenclast_si128(bits5, k); \ + } while (0) + + __m128i ek0, bits0, bits1, bits2, bits3, bits4, bits5 = _mm_setzero_si128(); + const __m128i *bits4keys = ctx->ecb.keys; /* is changed to supp->ctx.keys when calcurating suppout */ + struct ptls_fusion_gfmul_state gstate = {}; + __m128i gdatabuf[6]; + __m128i ac = _mm_shuffle_epi8(_mm_set_epi32(0, (int)aadlen * 8, 0, (int)inlen * 8), bswap8); + + // src and dst are updated after the chunk is processed + const __m128i *src = input; + __m128i *dst = output; + size_t srclen = inlen; + // aad and src_ghash are updated before the chunk is processed (i.e., when the pointers are fed indo the processor) + const __m128i *aad = _aad, *dst_ghash = dst; + size_t dst_ghashlen = srclen; + + struct ptls_fusion_aesgcm_ghash_precompute *ghash_precompute = ctx->ghash + (aadlen + 15) / 16 + (srclen + 15) / 16 + 1; + +#define STATE_EK0_BEEN_FED 0x3 +#define STATE_EK0_INCOMPLETE 0x2 +#define STATE_EK0_READY() ((state & STATE_EK0_BEEN_FED) == 0x1) +#define STATE_SUPP_USED 0x4 +#define STATE_SUPP_IN_PROCESS 0x8 + int32_t state = supp != NULL ? STATE_SUPP_USED : 0; + + /* build counter */ + ctr = _mm_insert_epi32(ctr, 1, 0); + ek0 = _mm_shuffle_epi8(ctr, bswap8); + + /* start preparing AES */ + AESECB6_INIT(); + AESECB6_UPDATE(1); + + /* build first ghash data (only AAD can be fed at this point, as this would be calculated alongside the first AES block) */ + const __m128i *gdata = gdatabuf; // points to the elements fed into GHASH + size_t gdata_cnt = 0; + if (PTLS_LIKELY(aadlen != 0)) { + while (gdata_cnt < 6) { + if (PTLS_LIKELY(aadlen < 16)) { + if (aadlen != 0) { + gdatabuf[gdata_cnt++] = loadn(aad, aadlen); + aadlen = 0; + } + goto MainLoop; + } + gdatabuf[gdata_cnt++] = _mm_loadu_si128(aad++); + aadlen -= 16; + } + } + + /* the main loop */ +MainLoop: + while (1) { + /* run AES and multiplication in parallel */ + size_t i; + for (i = 2; i < gdata_cnt + 2; ++i) { + AESECB6_UPDATE(i); + gfmul_onestep(&gstate, _mm_loadu_si128(gdata++), --ghash_precompute); + } + for (; i < ctx->ecb.rounds; ++i) + AESECB6_UPDATE(i); + AESECB6_FINAL(i); + + /* apply the bit stream to src and write to dest */ + if (PTLS_LIKELY(srclen >= 6 * 16)) { +#define APPLY(i) _mm_storeu_si128(dst + i, _mm_xor_si128(_mm_loadu_si128(src + i), bits##i)) + APPLY(0); + APPLY(1); + APPLY(2); + APPLY(3); + APPLY(4); + APPLY(5); +#undef APPLY + dst += 6; + src += 6; + srclen -= 6 * 16; + } else { + if ((state & STATE_EK0_BEEN_FED) == STATE_EK0_BEEN_FED) { + ek0 = bits5; + state &= ~STATE_EK0_INCOMPLETE; + } + if ((state & STATE_SUPP_IN_PROCESS) != 0) { + _mm_storeu_si128((__m128i *)supp->output, bits4); + state &= ~(STATE_SUPP_USED | STATE_SUPP_IN_PROCESS); + } + if (srclen != 0) { +#define APPLY(i) \ + do { \ + if (PTLS_LIKELY(srclen >= 16)) { \ + _mm_storeu_si128(dst++, _mm_xor_si128(_mm_loadu_si128(src++), bits##i)); \ + srclen -= 16; \ + } else if (PTLS_LIKELY(srclen != 0)) { \ + bits0 = bits##i; \ + goto ApplyRemainder; \ + } else { \ + goto ApplyEnd; \ + } \ + } while (0) + APPLY(0); + APPLY(1); + APPLY(2); + APPLY(3); + APPLY(4); + APPLY(5); +#undef APPLY + goto ApplyEnd; + ApplyRemainder: + storen(dst, srclen, _mm_xor_si128(loadn(src, srclen), bits0)); + dst = (__m128i *)((uint8_t *)dst + srclen); + srclen = 0; + ApplyEnd:; + } + } + + /* next block AES starts here */ + AESECB6_INIT(); + + AESECB6_UPDATE(1); + + /* setup gdata */ + if (PTLS_UNLIKELY(aadlen != 0)) { + gdata_cnt = 0; + while (gdata_cnt < 6) { + if (aadlen < 16) { + if (aadlen != 0) { + gdatabuf[gdata_cnt++] = loadn(aad, aadlen); + aadlen = 0; + } + goto GdataFillDST; + } + gdatabuf[gdata_cnt++] = _mm_loadu_si128(aad++); + aadlen -= 16; + } + gdata = gdatabuf; + } else if (PTLS_LIKELY(dst_ghashlen >= 6 * 16)) { + gdata = dst_ghash; + gdata_cnt = 6; + dst_ghash += 6; + dst_ghashlen -= 96; + } else { + gdata_cnt = 0; + GdataFillDST: + while (gdata_cnt < 6) { + if (dst_ghashlen < 16) { + if (dst_ghashlen != 0) { + gdatabuf[gdata_cnt++] = loadn(dst_ghash, dst_ghashlen); + dst_ghashlen = 0; + } + if (gdata_cnt < 6) + goto Finish; + break; + } + gdatabuf[gdata_cnt++] = _mm_loadu_si128(dst_ghash++); + dst_ghashlen -= 16; + } + gdata = gdatabuf; + } + } + +Finish: + gdatabuf[gdata_cnt++] = ac; + + /* We have complete set of data to be fed into GHASH. Let's finish the remaining calculation. + * Note that by now, all AES operations for payload encryption and ek0 are complete. This is is because it is necessary for GCM + * to process at least the same amount of data (i.e. payload-blocks + AC), and because AES is at least one 96-byte block ahead. + */ + assert(STATE_EK0_READY()); + for (size_t i = 0; i < gdata_cnt; ++i) + gfmul_onestep(&gstate, gdatabuf[i], --ghash_precompute); + + _mm_storeu_si128(dst, gfmul_final(&gstate, ek0)); + + /* Finish the calculation of supplemental vector. Done at the very last, because the sample might cover the GCM tag. */ + if ((state & STATE_SUPP_USED) != 0) { + size_t i; + if ((state & STATE_SUPP_IN_PROCESS) == 0) { + bits4keys = ((struct ctr_context *)supp->ctx)->fusion.keys; + bits4 = _mm_xor_si128(_mm_loadu_si128(supp->input), bits4keys[0]); + i = 1; + } else { + i = 2; + } + do { + bits4 = _mm_aesenc_si128(bits4, bits4keys[i++]); + } while (i != ctx->ecb.rounds); + bits4 = _mm_aesenclast_si128(bits4, bits4keys[i]); + _mm_storeu_si128((__m128i *)supp->output, bits4); + } + +#undef AESECB6_INIT +#undef AESECB6_UPDATE +#undef AESECB6_FINAL +#undef STATE_EK0_BEEN_FOUND +#undef STATE_EK0_READY +#undef STATE_SUPP_IN_PROCESS +} + +int ptls_fusion_aesgcm_decrypt(ptls_fusion_aesgcm_context_t *ctx, void *output, const void *input, size_t inlen, __m128i ctr, + const void *_aad, size_t aadlen, const void *tag) +{ + __m128i ek0 = _mm_setzero_si128(), bits0, bits1 = _mm_setzero_si128(), bits2 = _mm_setzero_si128(), bits3 = _mm_setzero_si128(), + bits4 = _mm_setzero_si128(), bits5 = _mm_setzero_si128(); + struct ptls_fusion_gfmul_state gstate = {}; + __m128i gdatabuf[6]; + __m128i ac = _mm_shuffle_epi8(_mm_set_epi32(0, (int)aadlen * 8, 0, (int)inlen * 8), bswap8); + struct ptls_fusion_aesgcm_ghash_precompute *ghash_precompute = ctx->ghash + (aadlen + 15) / 16 + (inlen + 15) / 16 + 1; + + const __m128i *gdata; // points to the elements fed into GHASH + size_t gdata_cnt; + + const __m128i *src_ghash = input, *src_aes = input, *aad = _aad; + __m128i *dst = output; + size_t nondata_aes_cnt = 0, src_ghashlen = inlen, src_aeslen = inlen; + + /* schedule ek0 and suppkey */ + ctr = _mm_add_epi64(ctr, one8); + bits0 = _mm_xor_si128(_mm_shuffle_epi8(ctr, bswap8), ctx->ecb.keys[0]); + ++nondata_aes_cnt; + +#define STATE_IS_FIRST_RUN 0x1 +#define STATE_GHASH_HAS_MORE 0x2 + int state = STATE_IS_FIRST_RUN | STATE_GHASH_HAS_MORE; + + /* the main loop */ + while (1) { + + /* setup gdata */ + if (PTLS_UNLIKELY(aadlen != 0)) { + gdata = gdatabuf; + gdata_cnt = 0; + while (gdata_cnt < 6) { + if (aadlen < 16) { + if (aadlen != 0) { + gdatabuf[gdata_cnt++] = loadn(aad, aadlen); + aadlen = 0; + ++nondata_aes_cnt; + } + goto GdataFillSrc; + } + gdatabuf[gdata_cnt++] = _mm_loadu_si128(aad++); + aadlen -= 16; + ++nondata_aes_cnt; + } + } else if (PTLS_LIKELY(src_ghashlen >= 6 * 16)) { + gdata = src_ghash; + gdata_cnt = 6; + src_ghash += 6; + src_ghashlen -= 6 * 16; + } else { + gdata = gdatabuf; + gdata_cnt = 0; + GdataFillSrc: + while (gdata_cnt < 6) { + if (src_ghashlen < 16) { + if (src_ghashlen != 0) { + gdatabuf[gdata_cnt++] = loadn(src_ghash, src_ghashlen); + src_ghash = (__m128i *)((uint8_t *)src_ghash + src_ghashlen); + src_ghashlen = 0; + } + if (gdata_cnt < 6 && (state & STATE_GHASH_HAS_MORE) != 0) { + gdatabuf[gdata_cnt++] = ac; + state &= ~STATE_GHASH_HAS_MORE; + } + break; + } + gdatabuf[gdata_cnt++] = _mm_loadu_si128(src_ghash++); + src_ghashlen -= 16; + } + } + + /* setup aes bits */ + if (PTLS_LIKELY(nondata_aes_cnt == 0)) + goto InitAllBits; + switch (nondata_aes_cnt) { +#define INIT_BITS(n, keys) \ + case n: \ + ctr = _mm_add_epi64(ctr, one8); \ + bits##n = _mm_xor_si128(_mm_shuffle_epi8(ctr, bswap8), keys[0]); + InitAllBits: + INIT_BITS(0, ctx->ecb.keys); + INIT_BITS(1, ctx->ecb.keys); + INIT_BITS(2, ctx->ecb.keys); + INIT_BITS(3, ctx->ecb.keys); + INIT_BITS(4, ctx->ecb.keys); + INIT_BITS(5, ctx->ecb.keys); +#undef INIT_BITS + } + + { /* run aes and ghash */ +#define AESECB6_UPDATE(i) \ + do { \ + __m128i k = ctx->ecb.keys[i]; \ + bits0 = _mm_aesenc_si128(bits0, k); \ + bits1 = _mm_aesenc_si128(bits1, k); \ + bits2 = _mm_aesenc_si128(bits2, k); \ + bits3 = _mm_aesenc_si128(bits3, k); \ + bits4 = _mm_aesenc_si128(bits4, k); \ + bits5 = _mm_aesenc_si128(bits5, k); \ + } while (0) + + size_t aesi; + for (aesi = 1; aesi <= gdata_cnt; ++aesi) { + AESECB6_UPDATE(aesi); + gfmul_onestep(&gstate, _mm_loadu_si128(gdata++), --ghash_precompute); + } + for (; aesi < ctx->ecb.rounds; ++aesi) + AESECB6_UPDATE(aesi); + __m128i k = ctx->ecb.keys[aesi]; + bits0 = _mm_aesenclast_si128(bits0, k); + bits1 = _mm_aesenclast_si128(bits1, k); + bits2 = _mm_aesenclast_si128(bits2, k); + bits3 = _mm_aesenclast_si128(bits3, k); + bits4 = _mm_aesenclast_si128(bits4, k); + bits5 = _mm_aesenclast_si128(bits5, k); + +#undef AESECB6_UPDATE + } + + /* apply aes bits */ + if (PTLS_LIKELY(nondata_aes_cnt == 0 && src_aeslen >= 6 * 16)) { +#define APPLY(i) _mm_storeu_si128(dst + i, _mm_xor_si128(_mm_loadu_si128(src_aes + i), bits##i)) + APPLY(0); + APPLY(1); + APPLY(2); + APPLY(3); + APPLY(4); + APPLY(5); +#undef APPLY + dst += 6; + src_aes += 6; + src_aeslen -= 6 * 16; + } else { + if ((state & STATE_IS_FIRST_RUN) != 0) { + ek0 = bits0; + state &= ~STATE_IS_FIRST_RUN; + } + switch (nondata_aes_cnt) { +#define APPLY(i) \ + case i: \ + if (PTLS_LIKELY(src_aeslen > 16)) { \ + _mm_storeu_si128(dst++, _mm_xor_si128(_mm_loadu_si128(src_aes++), bits##i)); \ + src_aeslen -= 16; \ + } else { \ + bits0 = bits##i; \ + goto Finish; \ + } + APPLY(0); + APPLY(1); + APPLY(2); + APPLY(3); + APPLY(4); + APPLY(5); +#undef APPLY + } + nondata_aes_cnt = 0; + } + } + +Finish: + if (src_aeslen == 16) { + _mm_storeu_si128(dst, _mm_xor_si128(_mm_loadu_si128(src_aes), bits0)); + } else if (src_aeslen != 0) { + storen(dst, src_aeslen, _mm_xor_si128(loadn(src_aes, src_aeslen), bits0)); + } + + assert((state & STATE_IS_FIRST_RUN) == 0); + + /* the only case where AES operation is complete and GHASH is not is when the application of AC is remaining */ + if ((state & STATE_GHASH_HAS_MORE) != 0) { + assert(ghash_precompute - 1 == ctx->ghash); + gfmul_onestep(&gstate, ac, --ghash_precompute); + } + + __m128i calctag = gfmul_final(&gstate, ek0); + + return _mm_movemask_epi8(_mm_cmpeq_epi8(calctag, _mm_loadu_si128(tag))) == 0xffff; + +#undef STATE_IS_FIRST_RUN +#undef STATE_GHASH_HAS_MORE +} + +static __m128i expand_key(__m128i key, __m128i temp) +{ + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); + + key = _mm_xor_si128(key, temp); + + return key; +} + +void ptls_fusion_aesecb_init(ptls_fusion_aesecb_context_t *ctx, int is_enc, const void *key, size_t key_size) +{ + assert(is_enc && "decryption is not supported (yet)"); + + size_t i = 0; + + switch (key_size) { + case 16: /* AES128 */ + ctx->rounds = 10; + break; + case 32: /* AES256 */ + ctx->rounds = 14; + break; + default: + assert(!"invalid key size; AES128 / AES256 are supported"); + break; + } + + ctx->keys[i++] = _mm_loadu_si128((__m128i *)key); + if (key_size == 32) + ctx->keys[i++] = _mm_loadu_si128((__m128i *)key + 1); + +#define EXPAND(R) \ + do { \ + ctx->keys[i] = expand_key(ctx->keys[i - key_size / 16], \ + _mm_shuffle_epi32(_mm_aeskeygenassist_si128(ctx->keys[i - 1], R), _MM_SHUFFLE(3, 3, 3, 3))); \ + if (i == ctx->rounds) \ + goto Done; \ + ++i; \ + if (key_size > 24) { \ + ctx->keys[i] = expand_key(ctx->keys[i - key_size / 16], \ + _mm_shuffle_epi32(_mm_aeskeygenassist_si128(ctx->keys[i - 1], R), _MM_SHUFFLE(2, 2, 2, 2))); \ + ++i; \ + } \ + } while (0) + EXPAND(0x1); + EXPAND(0x2); + EXPAND(0x4); + EXPAND(0x8); + EXPAND(0x10); + EXPAND(0x20); + EXPAND(0x40); + EXPAND(0x80); + EXPAND(0x1b); + EXPAND(0x36); +#undef EXPAND +Done: + assert(i == ctx->rounds); +} + +void ptls_fusion_aesecb_dispose(ptls_fusion_aesecb_context_t *ctx) +{ + ptls_clear_memory(ctx, sizeof(*ctx)); +} + +void ptls_fusion_aesecb_encrypt(ptls_fusion_aesecb_context_t *ctx, void *dst, const void *src) +{ + __m128i v = _mm_loadu_si128(src); + v = aesecb_encrypt(ctx, v); + _mm_storeu_si128(dst, v); +} + +/** + * returns the number of ghash entries that is required to handle an AEAD block of given size + */ +static size_t aesgcm_calc_ghash_cnt(size_t capacity) +{ + // round-up by block size, add to handle worst split of the size between AAD and payload, plus context to hash AC + return (capacity + 15) / 16 + 2; +} + +static void setup_one_ghash_entry(ptls_fusion_aesgcm_context_t *ctx) +{ + if (ctx->ghash_cnt != 0) + ctx->ghash[ctx->ghash_cnt].H = gfmul(ctx->ghash[ctx->ghash_cnt - 1].H, ctx->ghash[0].H); + + __m128i r = _mm_shuffle_epi32(ctx->ghash[ctx->ghash_cnt].H, 78); + r = _mm_xor_si128(r, ctx->ghash[ctx->ghash_cnt].H); + ctx->ghash[ctx->ghash_cnt].r = r; + + ++ctx->ghash_cnt; +} + +ptls_fusion_aesgcm_context_t *ptls_fusion_aesgcm_new(const void *key, size_t key_size, size_t capacity) +{ + ptls_fusion_aesgcm_context_t *ctx; + size_t ghash_cnt = aesgcm_calc_ghash_cnt(capacity); + + if ((ctx = malloc(sizeof(*ctx) + sizeof(ctx->ghash[0]) * ghash_cnt)) == NULL) + return NULL; + + ptls_fusion_aesecb_init(&ctx->ecb, 1, key, key_size); + + ctx->capacity = capacity; + + ctx->ghash[0].H = aesecb_encrypt(&ctx->ecb, _mm_setzero_si128()); + ctx->ghash[0].H = _mm_shuffle_epi8(ctx->ghash[0].H, bswap8); + ctx->ghash[0].H = transformH(ctx->ghash[0].H); + ctx->ghash_cnt = 0; + while (ctx->ghash_cnt < ghash_cnt) + setup_one_ghash_entry(ctx); + + return ctx; +} + +ptls_fusion_aesgcm_context_t *ptls_fusion_aesgcm_set_capacity(ptls_fusion_aesgcm_context_t *ctx, size_t capacity) +{ + size_t ghash_cnt = aesgcm_calc_ghash_cnt(capacity); + + if (ghash_cnt <= ctx->ghash_cnt) + return ctx; + + if ((ctx = realloc(ctx, sizeof(*ctx) + sizeof(ctx->ghash[0]) * ghash_cnt)) == NULL) + return NULL; + + ctx->capacity = capacity; + while (ghash_cnt < ctx->ghash_cnt) + setup_one_ghash_entry(ctx); + + return ctx; +} + +void ptls_fusion_aesgcm_free(ptls_fusion_aesgcm_context_t *ctx) +{ + ptls_clear_memory(ctx->ghash, sizeof(ctx->ghash[0]) * ctx->ghash_cnt); + ctx->ghash_cnt = 0; + ptls_fusion_aesecb_dispose(&ctx->ecb); + free(ctx); +} + +static void ctr_dispose(ptls_cipher_context_t *_ctx) +{ + struct ctr_context *ctx = (struct ctr_context *)_ctx; + ptls_fusion_aesecb_dispose(&ctx->fusion); + _mm_storeu_si128(&ctx->bits, _mm_setzero_si128()); +} + +static void ctr_init(ptls_cipher_context_t *_ctx, const void *iv) +{ + struct ctr_context *ctx = (struct ctr_context *)_ctx; + _mm_storeu_si128(&ctx->bits, aesecb_encrypt(&ctx->fusion, _mm_loadu_si128(iv))); + ctx->is_ready = 1; +} + +static void ctr_transform(ptls_cipher_context_t *_ctx, void *output, const void *input, size_t len) +{ + struct ctr_context *ctx = (struct ctr_context *)_ctx; + + assert((ctx->is_ready && len <= 16) || + !"CTR transfomation is supported only once per call to `init` and the maximum size is limited to 16 bytes"); + ctx->is_ready = 0; + + if (len < 16) { + storen(output, len, _mm_xor_si128(_mm_loadu_si128(&ctx->bits), loadn(input, len))); + } else { + _mm_storeu_si128(output, _mm_xor_si128(_mm_loadu_si128(&ctx->bits), _mm_loadu_si128(input))); + } +} + +static int aesctr_setup(ptls_cipher_context_t *_ctx, int is_enc, const void *key, size_t key_size) +{ + struct ctr_context *ctx = (struct ctr_context *)_ctx; + + ctx->super.do_dispose = ctr_dispose; + ctx->super.do_init = ctr_init; + ctx->super.do_transform = ctr_transform; + ptls_fusion_aesecb_init(&ctx->fusion, 1, key, key_size); + ctx->is_ready = 0; + + return 0; +} + +static int aes128ctr_setup(ptls_cipher_context_t *ctx, int is_enc, const void *key) +{ + return aesctr_setup(ctx, is_enc, key, PTLS_AES128_KEY_SIZE); +} + +static int aes256ctr_setup(ptls_cipher_context_t *ctx, int is_enc, const void *key) +{ + return aesctr_setup(ctx, is_enc, key, PTLS_AES256_KEY_SIZE); +} + +static void aesgcm_dispose_crypto(ptls_aead_context_t *_ctx) +{ + struct aesgcm_context *ctx = (struct aesgcm_context *)_ctx; + + ptls_fusion_aesgcm_free(ctx->aesgcm); +} + +static void aead_do_encrypt_init(ptls_aead_context_t *_ctx, uint64_t seq, const void *aad, size_t aadlen) +{ + assert(!"FIXME"); +} + +static size_t aead_do_encrypt_update(ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen) +{ + assert(!"FIXME"); + return SIZE_MAX; +} + +static size_t aead_do_encrypt_final(ptls_aead_context_t *_ctx, void *_output) +{ + assert(!"FIXME"); + return SIZE_MAX; +} + +static inline __m128i calc_counter(struct aesgcm_context *ctx, uint64_t seq) +{ + __m128i ctr = _mm_setzero_si128(); + ctr = _mm_insert_epi64(ctr, seq, 0); + ctr = _mm_slli_si128(ctr, 4); + ctr = _mm_xor_si128(ctx->static_iv, ctr); + return ctr; +} + +void aead_do_encrypt(struct st_ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen, ptls_aead_supplementary_encryption_t *supp) +{ + struct aesgcm_context *ctx = (void *)_ctx; + + if (inlen + aadlen > ctx->aesgcm->capacity) + ctx->aesgcm = ptls_fusion_aesgcm_set_capacity(ctx->aesgcm, inlen + aadlen); + ptls_fusion_aesgcm_encrypt(ctx->aesgcm, output, input, inlen, calc_counter(ctx, seq), aad, aadlen, supp); +} + +static size_t aead_do_decrypt(ptls_aead_context_t *_ctx, void *output, const void *input, size_t inlen, uint64_t seq, + const void *aad, size_t aadlen) +{ + struct aesgcm_context *ctx = (void *)_ctx; + + if (inlen < 16) + return SIZE_MAX; + + size_t enclen = inlen - 16; + if (enclen + aadlen > ctx->aesgcm->capacity) + ctx->aesgcm = ptls_fusion_aesgcm_set_capacity(ctx->aesgcm, enclen + aadlen); + if (!ptls_fusion_aesgcm_decrypt(ctx->aesgcm, output, input, enclen, calc_counter(ctx, seq), aad, aadlen, + (const uint8_t *)input + enclen)) + return SIZE_MAX; + return enclen; +} + +static int aesgcm_setup(ptls_aead_context_t *_ctx, int is_enc, const void *key, const void *iv, size_t key_size) +{ + struct aesgcm_context *ctx = (struct aesgcm_context *)_ctx; + + ctx->static_iv = loadn(iv, PTLS_AESGCM_IV_SIZE); + ctx->static_iv = _mm_shuffle_epi8(ctx->static_iv, bswap8); + if (key == NULL) + return 0; + + ctx->super.dispose_crypto = aesgcm_dispose_crypto; + ctx->super.do_encrypt_init = aead_do_encrypt_init; + ctx->super.do_encrypt_update = aead_do_encrypt_update; + ctx->super.do_encrypt_final = aead_do_encrypt_final; + ctx->super.do_encrypt = aead_do_encrypt; + ctx->super.do_decrypt = aead_do_decrypt; + + ctx->aesgcm = ptls_fusion_aesgcm_new(key, key_size, 1500 /* assume ordinary packet size */); + + return 0; +} + +static int aes128gcm_setup(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv) +{ + return aesgcm_setup(ctx, is_enc, key, iv, PTLS_AES128_KEY_SIZE); +} + +static int aes256gcm_setup(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv) +{ + return aesgcm_setup(ctx, is_enc, key, iv, PTLS_AES256_KEY_SIZE); +} + +ptls_cipher_algorithm_t ptls_fusion_aes128ctr = {"AES128-CTR", + PTLS_AES128_KEY_SIZE, + 1, // block size + PTLS_AES_IV_SIZE, + sizeof(struct ctr_context), + aes128ctr_setup}; +ptls_cipher_algorithm_t ptls_fusion_aes256ctr = {"AES256-CTR", + PTLS_AES256_KEY_SIZE, + 1, // block size + PTLS_AES_IV_SIZE, + sizeof(struct ctr_context), + aes256ctr_setup}; +ptls_aead_algorithm_t ptls_fusion_aes128gcm = {"AES128-GCM", + &ptls_fusion_aes128ctr, + NULL, // &ptls_fusion_aes128ecb, + PTLS_AES128_KEY_SIZE, + PTLS_AESGCM_IV_SIZE, + PTLS_AESGCM_TAG_SIZE, + sizeof(struct aesgcm_context), + aes128gcm_setup}; +ptls_aead_algorithm_t ptls_fusion_aes256gcm = {"AES256-GCM", + &ptls_fusion_aes256ctr, + NULL, // &ptls_fusion_aes256ecb, + PTLS_AES256_KEY_SIZE, + PTLS_AESGCM_IV_SIZE, + PTLS_AESGCM_TAG_SIZE, + sizeof(struct aesgcm_context), + aes256gcm_setup}; + +int ptls_fusion_is_supported_by_cpu(void) +{ + unsigned leaf1_ecx, leaf7_ebx; + + { /* GCC-specific code to obtain CPU features */ + unsigned leaf_cnt; + __asm__("cpuid" : "=a"(leaf_cnt) : "a"(0) : "ebx", "ecx", "edx"); + if (leaf_cnt < 7) + return 0; + __asm__("cpuid" : "=c"(leaf1_ecx) : "a"(1) : "ebx", "edx"); + __asm__("cpuid" : "=b"(leaf7_ebx) : "a"(7), "c"(0) : "edx"); + } + + /* AVX2 */ + if ((leaf7_ebx & (1 << 5)) == 0) + return 0; + /* AES */ + if ((leaf1_ecx & (1 << 25)) == 0) + return 0; + /* PCLMUL */ + if ((leaf1_ecx & (1 << 1)) == 0) + return 0; + + return 1; +} diff --git a/lib/openssl.c b/lib/openssl.c index e7ae6274b..d907346cf 100644 --- a/lib/openssl.c +++ b/lib/openssl.c @@ -768,6 +768,7 @@ static int bfecb_setup_crypto(ptls_cipher_context_t *ctx, int is_enc, const void struct aead_crypto_context_t { ptls_aead_context_t super; EVP_CIPHER_CTX *evp_ctx; + uint8_t static_iv[PTLS_MAX_IV_SIZE]; }; static void aead_dispose_crypto(ptls_aead_context_t *_ctx) @@ -778,12 +779,13 @@ static void aead_dispose_crypto(ptls_aead_context_t *_ctx) EVP_CIPHER_CTX_free(ctx->evp_ctx); } -static void aead_do_encrypt_init(ptls_aead_context_t *_ctx, const void *iv, const void *aad, size_t aadlen) +static void aead_do_encrypt_init(ptls_aead_context_t *_ctx, uint64_t seq, const void *aad, size_t aadlen) { struct aead_crypto_context_t *ctx = (struct aead_crypto_context_t *)_ctx; + uint8_t iv[PTLS_MAX_IV_SIZE]; int ret; - /* FIXME for performance, preserve the expanded key instead of the raw key */ + ptls_aead__build_iv(ctx->super.algo, iv, ctx->static_iv, seq); ret = EVP_EncryptInit_ex(ctx->evp_ctx, NULL, NULL, NULL, iv); assert(ret); @@ -822,17 +824,18 @@ static size_t aead_do_encrypt_final(ptls_aead_context_t *_ctx, void *_output) return off; } -static size_t aead_do_decrypt(ptls_aead_context_t *_ctx, void *_output, const void *input, size_t inlen, const void *iv, +static size_t aead_do_decrypt(ptls_aead_context_t *_ctx, void *_output, const void *input, size_t inlen, uint64_t seq, const void *aad, size_t aadlen) { struct aead_crypto_context_t *ctx = (struct aead_crypto_context_t *)_ctx; - uint8_t *output = _output; + uint8_t *output = _output, iv[PTLS_MAX_IV_SIZE]; size_t off = 0, tag_size = ctx->super.algo->tag_size; int blocklen, ret; if (inlen < tag_size) return SIZE_MAX; + ptls_aead__build_iv(ctx->super.algo, iv, ctx->static_iv, seq); ret = EVP_DecryptInit_ex(ctx->evp_ctx, NULL, NULL, NULL, iv); assert(ret); if (aadlen != 0) { @@ -851,16 +854,21 @@ static size_t aead_do_decrypt(ptls_aead_context_t *_ctx, void *_output, const vo return off; } -static int aead_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void *key, const EVP_CIPHER *cipher) +static int aead_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void *key, const void *iv, const EVP_CIPHER *cipher) { struct aead_crypto_context_t *ctx = (struct aead_crypto_context_t *)_ctx; int ret; + memcpy(ctx->static_iv, iv, ctx->super.algo->iv_size); + if (key == NULL) + return 0; + ctx->super.dispose_crypto = aead_dispose_crypto; if (is_enc) { ctx->super.do_encrypt_init = aead_do_encrypt_init; ctx->super.do_encrypt_update = aead_do_encrypt_update; ctx->super.do_encrypt_final = aead_do_encrypt_final; + ctx->super.do_encrypt = ptls_aead__do_encrypt; ctx->super.do_decrypt = NULL; } else { ctx->super.do_encrypt_init = NULL; @@ -897,20 +905,20 @@ static int aead_setup_crypto(ptls_aead_context_t *_ctx, int is_enc, const void * return ret; } -static int aead_aes128gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key) +static int aead_aes128gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv) { - return aead_setup_crypto(ctx, is_enc, key, EVP_aes_128_gcm()); + return aead_setup_crypto(ctx, is_enc, key, iv, EVP_aes_128_gcm()); } -static int aead_aes256gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key) +static int aead_aes256gcm_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv) { - return aead_setup_crypto(ctx, is_enc, key, EVP_aes_256_gcm()); + return aead_setup_crypto(ctx, is_enc, key, iv, EVP_aes_256_gcm()); } #if PTLS_OPENSSL_HAVE_CHACHA20_POLY1305 -static int aead_chacha20poly1305_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key) +static int aead_chacha20poly1305_setup_crypto(ptls_aead_context_t *ctx, int is_enc, const void *key, const void *iv) { - return aead_setup_crypto(ctx, is_enc, key, EVP_chacha20_poly1305()); + return aead_setup_crypto(ctx, is_enc, key, iv, EVP_chacha20_poly1305()); } #endif diff --git a/lib/picotls.c b/lib/picotls.c index 519ef7734..774b879c0 100644 --- a/lib/picotls.c +++ b/lib/picotls.c @@ -5117,28 +5117,18 @@ void ptls_cipher_free(ptls_cipher_context_t *ctx) ptls_aead_context_t *new_aead(ptls_aead_algorithm_t *aead, ptls_hash_algorithm_t *hash, int is_enc, const void *secret, ptls_iovec_t hash_value, const char *label_prefix) { - ptls_aead_context_t *ctx; - uint8_t key[PTLS_MAX_SECRET_SIZE]; + ptls_aead_context_t *ctx = NULL; + uint8_t key_iv[aead->key_size + aead->iv_size]; int ret; - if ((ctx = (ptls_aead_context_t *)malloc(aead->context_size)) == NULL) - return NULL; - - *ctx = (ptls_aead_context_t){aead}; - if ((ret = get_traffic_key(hash, key, aead->key_size, 0, secret, hash_value, label_prefix)) != 0) + if ((ret = get_traffic_key(hash, key_iv, aead->key_size, 0, secret, hash_value, label_prefix)) != 0) goto Exit; - if ((ret = get_traffic_key(hash, ctx->static_iv, aead->iv_size, 1, secret, hash_value, label_prefix)) != 0) + if ((ret = get_traffic_key(hash, key_iv + aead->key_size, aead->iv_size, 1, secret, hash_value, label_prefix)) != 0) goto Exit; - ret = aead->setup_crypto(ctx, is_enc, key); + ctx = ptls_aead_new_direct(aead, is_enc, key_iv, key_iv + aead->key_size); Exit: - ptls_clear_memory(key, aead->key_size); - if (ret != 0) { - ptls_clear_memory(ctx->static_iv, aead->iv_size); - free(ctx); - ctx = NULL; - } - + ptls_clear_memory(key_iv, sizeof(key_iv)); return ctx; } @@ -5148,29 +5138,33 @@ ptls_aead_context_t *ptls_aead_new(ptls_aead_algorithm_t *aead, ptls_hash_algori return new_aead(aead, hash, is_enc, secret, ptls_iovec_init(NULL, 0), label_prefix); } -void ptls_aead_free(ptls_aead_context_t *ctx) +ptls_aead_context_t *ptls_aead_new_direct(ptls_aead_algorithm_t *aead, int is_enc, const void *key, const void *iv) { - ctx->dispose_crypto(ctx); - ptls_clear_memory(ctx->static_iv, ctx->algo->iv_size); - free(ctx); -} + ptls_aead_context_t *ctx; -size_t ptls_aead_encrypt(ptls_aead_context_t *ctx, void *output, const void *input, size_t inlen, uint64_t seq, const void *aad, - size_t aadlen) -{ - size_t off = 0; + if ((ctx = (ptls_aead_context_t *)malloc(aead->context_size)) == NULL) + return NULL; - ptls_aead_encrypt_init(ctx, seq, aad, aadlen); - off += ptls_aead_encrypt_update(ctx, ((uint8_t *)output) + off, input, inlen); - off += ptls_aead_encrypt_final(ctx, ((uint8_t *)output) + off); + *ctx = (ptls_aead_context_t){aead}; - return off; + if (aead->setup_crypto(ctx, is_enc, key, iv) != 0) { + free(ctx); + return NULL; + } + + return ctx; +} + +void ptls_aead_free(ptls_aead_context_t *ctx) +{ + ctx->dispose_crypto(ctx); + free(ctx); } -void ptls_aead__build_iv(ptls_aead_context_t *ctx, uint8_t *iv, uint64_t seq) +void ptls_aead__build_iv(ptls_aead_algorithm_t *algo, uint8_t *iv, const uint8_t *static_iv, uint64_t seq) { - size_t iv_size = ctx->algo->iv_size, i; - const uint8_t *s = ctx->static_iv; + size_t iv_size = algo->iv_size, i; + const uint8_t *s = static_iv; uint8_t *d = iv; /* build iv */ diff --git a/picotls.xcodeproj/project.pbxproj b/picotls.xcodeproj/project.pbxproj index 4788870e1..3fe64e924 100644 --- a/picotls.xcodeproj/project.pbxproj +++ b/picotls.xcodeproj/project.pbxproj @@ -10,7 +10,6 @@ 105900431DC8D57000FB4085 /* picotls.c in Sources */ = {isa = PBXBuildFile; fileRef = 106530E91D9B7C13005B2C60 /* picotls.c */; }; 105900441DC8D57000FB4085 /* picotest.c in Sources */ = {isa = PBXBuildFile; fileRef = 106530E31D9B4021005B2C60 /* picotest.c */; }; 1059004C1DC8D5B700FB4085 /* openssl.c in Sources */ = {isa = PBXBuildFile; fileRef = 106530C51D9B1A98005B2C60 /* openssl.c */; }; - 1059004E1DC8D61800FB4085 /* minicrypto.c in Sources */ = {isa = PBXBuildFile; fileRef = 1059003D1DC8D4E300FB4085 /* minicrypto.c */; }; 105900501DC8D64E00FB4085 /* minicrypto.h in Headers */ = {isa = PBXBuildFile; fileRef = 1059004F1DC8D64E00FB4085 /* minicrypto.h */; }; 105900611DC8DF8C00FB4085 /* sha256.c in Sources */ = {isa = PBXBuildFile; fileRef = 1059005F1DC8DE4400FB4085 /* sha256.c */; }; 105900641DC8DFA700FB4085 /* curve25519.c in Sources */ = {isa = PBXBuildFile; fileRef = 105900391DC8D46A00FB4085 /* curve25519.c */; }; @@ -92,6 +91,7 @@ 10EACB1A1DCEC2A300CA0341 /* libpicotls-core.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 106530DA1D9B3E6F005B2C60 /* libpicotls-core.a */; }; E949EF282073629300511ECA /* minicrypto-pem.c in Sources */ = {isa = PBXBuildFile; fileRef = E949EF272073629300511ECA /* minicrypto-pem.c */; }; E95E95382290456B00215ACD /* picotls-probes.d in Sources */ = {isa = PBXBuildFile; fileRef = E95EBCC0227B71170022C32D /* picotls-probes.d */; }; + E973651E246E37300039AA49 /* libpicotls-minicrypto.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 10EACB171DCEAF0F00CA0341 /* libpicotls-minicrypto.a */; }; E97577012212405300D1EF74 /* ffx.h in Headers */ = {isa = PBXBuildFile; fileRef = E97577002212405300D1EF74 /* ffx.h */; }; E97577032212405D00D1EF74 /* ffx.c in Sources */ = {isa = PBXBuildFile; fileRef = E97577022212405D00D1EF74 /* ffx.c */; }; E97577042212407900D1EF74 /* ffx.c in Sources */ = {isa = PBXBuildFile; fileRef = E97577022212405D00D1EF74 /* ffx.c */; }; @@ -112,6 +112,11 @@ E99B75E31F5CE54D00CF503E /* asn1.c in Sources */ = {isa = PBXBuildFile; fileRef = E99B75DE1F5CDDB500CF503E /* asn1.c */; }; E99B75E41F5CE64E00CF503E /* pembase64.c in Sources */ = {isa = PBXBuildFile; fileRef = E99B75DF1F5CDDB500CF503E /* pembase64.c */; }; E99B75E51F5CE64E00CF503E /* pembase64.c in Sources */ = {isa = PBXBuildFile; fileRef = E99B75DF1F5CDDB500CF503E /* pembase64.c */; }; + E9B43DC224619D5100824E51 /* picotls-probes.d in Sources */ = {isa = PBXBuildFile; fileRef = E95EBCC0227B71170022C32D /* picotls-probes.d */; }; + E9B43DC824619D5100824E51 /* picotest.c in Sources */ = {isa = PBXBuildFile; fileRef = 106530E31D9B4021005B2C60 /* picotest.c */; }; + E9B43DE424619D7E00824E51 /* fusion.c in Sources */ = {isa = PBXBuildFile; fileRef = E9B43DE224619D7E00824E51 /* fusion.c */; }; + E9B43DE524619E1600824E51 /* minicrypto.c in Sources */ = {isa = PBXBuildFile; fileRef = 1059003D1DC8D4E300FB4085 /* minicrypto.c */; }; + E9B43E1024689FE700824E51 /* picotls.c in Sources */ = {isa = PBXBuildFile; fileRef = 106530BF1D998641005B2C60 /* picotls.c */; }; E9BC76CF1EF3A35E00EB7A09 /* chacha20.c in Sources */ = {isa = PBXBuildFile; fileRef = E9BC76C61EF3A2F700EB7A09 /* chacha20.c */; }; E9BC76D21EF3A36A00EB7A09 /* chacha20.c in Sources */ = {isa = PBXBuildFile; fileRef = E9BC76C61EF3A2F700EB7A09 /* chacha20.c */; }; E9BC76D41EF3A37200EB7A09 /* chacha20.c in Sources */ = {isa = PBXBuildFile; fileRef = E9BC76C61EF3A2F700EB7A09 /* chacha20.c */; }; @@ -205,6 +210,15 @@ ); runOnlyForDeploymentPostprocessing = 1; }; + E9B43DDD24619D5100824E51 /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ @@ -269,6 +283,10 @@ E992F7A920E99A7C0008154D /* picotls-esni */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "picotls-esni"; sourceTree = BUILT_PRODUCTS_DIR; }; E99B75DE1F5CDDB500CF503E /* asn1.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = asn1.c; sourceTree = ""; }; E99B75DF1F5CDDB500CF503E /* pembase64.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = pembase64.c; sourceTree = ""; }; + E9B43DBF24619D1700824E51 /* fusion.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = fusion.c; sourceTree = ""; }; + E9B43DE124619D5100824E51 /* test-fusion */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "test-fusion"; sourceTree = BUILT_PRODUCTS_DIR; }; + E9B43DE224619D7E00824E51 /* fusion.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = fusion.c; sourceTree = ""; }; + E9B43DE62461A06800824E51 /* fusion.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = fusion.h; sourceTree = ""; }; E9BC76C61EF3A2F700EB7A09 /* chacha20.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = chacha20.c; path = src/chacha20.c; sourceTree = ""; }; E9BC76CC1EF3A31000EB7A09 /* salsa20.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = salsa20.h; path = src/salsa20.h; sourceTree = ""; }; E9BC76D61EF3C1C200EB7A09 /* poly1305.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = poly1305.c; path = src/poly1305.c; sourceTree = ""; }; @@ -340,6 +358,14 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + E9B43DDC24619D5100824E51 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + E973651E246E37300039AA49 /* libpicotls-minicrypto.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ @@ -403,6 +429,7 @@ E992F79920E99A080008154D /* src */, 106530C41D9B1A0E005B2C60 /* t */, 106530B31D9985E0005B2C60 /* Products */, + E973651D246E37300039AA49 /* Frameworks */, ); sourceTree = ""; }; @@ -416,6 +443,7 @@ 1059008C1DC8E1A300FB4085 /* libpicotls-openssl.a */, 10EACB171DCEAF0F00CA0341 /* libpicotls-minicrypto.a */, E992F7A920E99A7C0008154D /* picotls-esni */, + E9B43DE124619D5100824E51 /* test-fusion */, ); name = Products; sourceTree = ""; @@ -434,6 +462,7 @@ children = ( E99B75DE1F5CDDB500CF503E /* asn1.c */, E97577022212405D00D1EF74 /* ffx.c */, + E9B43DBF24619D1700824E51 /* fusion.c */, E99B75DF1F5CDDB500CF503E /* pembase64.c */, E9F20BDF22E34B210018D260 /* cifra */, 1059003F1DC8D53200FB4085 /* cifra.c */, @@ -451,6 +480,7 @@ children = ( 106530FE1DAD8A3C005B2C60 /* cli.c */, E97577072213148800D1EF74 /* e2e.t */, + E9B43DE224619D7E00824E51 /* fusion.c */, 106530E91D9B7C13005B2C60 /* picotls.c */, 1059003D1DC8D4E300FB4085 /* minicrypto.c */, 106530C51D9B1A98005B2C60 /* openssl.c */, @@ -486,6 +516,7 @@ 1059004F1DC8D64E00FB4085 /* minicrypto.h */, 106530ED1D9CEFF7005B2C60 /* openssl.h */, E9E4B1292180514000514B47 /* certificate_compression.h */, + E9B43DE62461A06800824E51 /* fusion.h */, ); path = picotls; sourceTree = ""; @@ -516,6 +547,13 @@ path = cmake; sourceTree = ""; }; + E973651D246E37300039AA49 /* Frameworks */ = { + isa = PBXGroup; + children = ( + ); + name = Frameworks; + sourceTree = ""; + }; E992F79920E99A080008154D /* src */ = { isa = PBXGroup; children = ( @@ -726,6 +764,23 @@ productReference = E992F7A920E99A7C0008154D /* picotls-esni */; productType = "com.apple.product-type.tool"; }; + E9B43DC024619D5100824E51 /* test-fusion */ = { + isa = PBXNativeTarget; + buildConfigurationList = E9B43DDE24619D5100824E51 /* Build configuration list for PBXNativeTarget "test-fusion" */; + buildPhases = ( + E9B43DC124619D5100824E51 /* Sources */, + E9B43DDC24619D5100824E51 /* Frameworks */, + E9B43DDD24619D5100824E51 /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "test-fusion"; + productName = "test-crypto-openssl"; + productReference = E9B43DE124619D5100824E51 /* test-fusion */; + productType = "com.apple.product-type.tool"; + }; /* End PBXNativeTarget section */ /* Begin PBXProject section */ @@ -748,6 +803,7 @@ developmentRegion = English; hasScannedForEncodings = 0; knownRegions = ( + English, en, ); mainGroup = 106530A91D9985E0005B2C60; @@ -762,6 +818,7 @@ 106530CB1D9B3D45005B2C60 /* test-openssl */, 105900411DC8D57000FB4085 /* test-minicrypto */, E992F79B20E99A7C0008154D /* picotls-esni */, + E9B43DC024619D5100824E51 /* test-fusion */, ); }; /* End PBXProject section */ @@ -778,10 +835,10 @@ 105900431DC8D57000FB4085 /* picotls.c in Sources */, 105900C41DC96B2200FB4085 /* uECC.c in Sources */, 105900441DC8D57000FB4085 /* picotest.c in Sources */, + E9B43DE524619E1600824E51 /* minicrypto.c in Sources */, 105900611DC8DF8C00FB4085 /* sha256.c in Sources */, E9F20BF122E34B480018D260 /* chacha20.c in Sources */, E9BC76D21EF3A36A00EB7A09 /* chacha20.c in Sources */, - 1059004E1DC8D61800FB4085 /* minicrypto.c in Sources */, E99B75E31F5CE54D00CF503E /* asn1.c in Sources */, E9E865ED203BD46700E2FFCD /* sha512.c in Sources */, E9F20BF022E34B480018D260 /* aes256.c in Sources */, @@ -901,6 +958,17 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + E9B43DC124619D5100824E51 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + E9B43DC224619D5100824E51 /* picotls-probes.d in Sources */, + E9B43E1024689FE700824E51 /* picotls.c in Sources */, + E9B43DE424619D7E00824E51 /* fusion.c in Sources */, + E9B43DC824619D5100824E51 /* picotest.c in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXSourcesBuildPhase section */ /* Begin PBXTargetDependency section */ @@ -1214,6 +1282,26 @@ }; name = Release; }; + E9B43DDF24619D5100824E51 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = "$(inherited)"; + OTHER_CFLAGS = "-march=native"; + OTHER_LDFLAGS = ""; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + E9B43DE024619D5100824E51 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + GCC_PREPROCESSOR_DEFINITIONS = "$(inherited)"; + OTHER_CFLAGS = "-march=native"; + OTHER_LDFLAGS = ""; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ @@ -1289,6 +1377,15 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + E9B43DDE24619D5100824E51 /* Build configuration list for PBXNativeTarget "test-fusion" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + E9B43DDF24619D5100824E51 /* Debug */, + E9B43DE024619D5100824E51 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; /* End XCConfigurationList section */ }; rootObject = 106530AA1D9985E0005B2C60 /* Project object */; diff --git a/t/fusion.c b/t/fusion.c new file mode 100644 index 000000000..c51e88e00 --- /dev/null +++ b/t/fusion.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2020 Fastly, Kazuho Oku + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include +#include +#include +#include "picotls/fusion.h" +#include "picotls/minicrypto.h" +#include "../deps/picotest/picotest.h" +#include "../lib/fusion.c" + +static const char *tostr(const void *_p, size_t len) +{ + static char *buf; + + if (buf != NULL) + free(buf); + buf = malloc(len * 2 + 1); + + const uint8_t *s = _p; + char *d = buf; + + for (; len != 0; --len) { + *d++ = "0123456789abcdef"[*s >> 4]; + *d++ = "0123456789abcdef"[*s & 0xf]; + ++s; + } + *d = '\0'; + + return buf; +} + +static void test_loadn(void) +{ + uint8_t buf[8192] = {}; + + for (size_t off = 0; off < 8192 - 15; ++off) { + uint8_t *src = buf + off; + memcpy(src, "hello world12345", 16); + __m128i v = loadn(src, 11); + if (memcmp(&v, "hello world\0\0\0\0\0", 16) != 0) { + ok(!"fail"); + return; + } + memset(src, 0, 11); + } + ok(!!"success"); +} + +static const uint8_t zero[16384] = {}; + +static void test_ecb(void) +{ + ptls_fusion_aesecb_context_t ecb; + uint8_t encrypted[16]; + + ptls_fusion_aesecb_init(&ecb, 1, zero, 16); + ptls_fusion_aesecb_encrypt(&ecb, encrypted, "hello world!!!!!"); + ptls_fusion_aesecb_dispose(&ecb); + ok(strcmp(tostr(encrypted, 16), "172afecb50b5f1237814b2f7cb51d0f7") == 0); + + ptls_fusion_aesecb_init(&ecb, 1, zero, 32); + ptls_fusion_aesecb_encrypt(&ecb, encrypted, "hello world!!!!!"); + ptls_fusion_aesecb_dispose(&ecb); + ok(strcmp(tostr(encrypted, 16), "2a033f0627b3554aa4fe5786550736ff") == 0); +} + +static void gcm_basic(void) +{ + { + static const uint8_t expected[] = {0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92, 0xf3, 0x28, 0xc2, + 0xb9, 0x71, 0xb2, 0xfe, 0x78, 0x97, 0x3f, 0xbc, 0xa6, 0x54, 0x77, + 0xbf, 0x47, 0x85, 0xb0, 0xd5, 0x61, 0xf7, 0xe3, 0xfd, 0x6c}; + ptls_fusion_aesgcm_context_t *ctx = ptls_fusion_aesgcm_new(zero, PTLS_AES128_KEY_SIZE, 5 + 16); + uint8_t encrypted[sizeof(expected)], decrypted[sizeof(expected) - 16]; + ptls_fusion_aesgcm_encrypt(ctx, encrypted, zero, 16, _mm_setzero_si128(), "hello", 5, NULL); + ok(memcmp(expected, encrypted, sizeof(expected)) == 0); + memset(decrypted, 0x55, sizeof(decrypted)); + ok(ptls_fusion_aesgcm_decrypt(ctx, decrypted, expected, 16, _mm_setzero_si128(), "hello", 5, expected + 16)); + ok(memcmp(decrypted, zero, sizeof(decrypted)) == 0); + ptls_fusion_aesgcm_free(ctx); + } + + { + static const uint8_t key[16] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff}, + aad[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, + iv[] = {20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + plaintext[] = + "hello world\nhello world\nhello world\nhello world\nhello world\nhello world\nhello world\n"; + static const uint8_t expected[] = {0xd3, 0xa8, 0x1d, 0x96, 0x4c, 0x9b, 0x02, 0xd7, 0x9a, 0xb0, 0x41, 0x07, 0x4c, 0x8c, 0xe2, + 0xe0, 0x2e, 0x83, 0x54, 0x52, 0x45, 0xcb, 0xd4, 0x68, 0xc8, 0x43, 0x45, 0xca, 0x91, 0xfb, + 0xa3, 0x7a, 0x67, 0xed, 0xe8, 0xd7, 0x5e, 0xe2, 0x33, 0xd1, 0x3e, 0xbf, 0x50, 0xc2, 0x4b, + 0x86, 0x83, 0x55, 0x11, 0xbb, 0x17, 0x4f, 0xf5, 0x78, 0xb8, 0x65, 0xeb, 0x9a, 0x2b, 0x8f, + 0x77, 0x08, 0xa9, 0x60, 0x17, 0x73, 0xc5, 0x07, 0xf3, 0x04, 0xc9, 0x3f, 0x67, 0x4d, 0x12, + 0xa1, 0x02, 0x93, 0xc2, 0x3c, 0xd3, 0xf8, 0x59, 0x33, 0xd5, 0x01, 0xc3, 0xbb, 0xaa, 0xe6, + 0x3f, 0xbb, 0x23, 0x66, 0x94, 0x26, 0x28, 0x43, 0xa5, 0xfd, 0x2f}; + ptls_aead_context_t *aead = ptls_aead_new_direct(&ptls_fusion_aes128gcm, 0, key, iv); + uint8_t encrypted[sizeof(plaintext) + 16], decrypted[sizeof(plaintext)]; + ptls_aead_encrypt(aead, encrypted, plaintext, sizeof(plaintext), 0, aad, sizeof(aad)); + ok(memcmp(expected, encrypted, sizeof(plaintext)) == 0); + ok(memcmp(expected + sizeof(plaintext), encrypted + sizeof(plaintext), 16) == 0); + ok(ptls_aead_decrypt(aead, decrypted, encrypted, sizeof(encrypted), 0, aad, sizeof(aad)) == sizeof(plaintext)); + ok(memcmp(decrypted, plaintext, sizeof(plaintext)) == 0); + ptls_aead_free(aead); + } +} + +static void gcm_capacity(void) +{ + static const uint8_t expected[17] = {0x5b, 0x27, 0x21, 0x5e, 0xd8, 0x1a, 0x70, 0x2e, 0x39, + 0x41, 0xc8, 0x05, 0x77, 0xd5, 0x2f, 0xcb, 0x57}; + ptls_fusion_aesgcm_context_t *ctx = ptls_fusion_aesgcm_new(zero, PTLS_AES128_KEY_SIZE, 2); + uint8_t encrypted[17], decrypted[1] = {0x55}; + ptls_fusion_aesgcm_encrypt(ctx, encrypted, "X", 1, _mm_setzero_si128(), "a", 1, NULL); + ok(memcmp(expected, encrypted, 17) == 0); + ok(ptls_fusion_aesgcm_decrypt(ctx, decrypted, expected, 1, _mm_setzero_si128(), "a", 1, expected + 1)); + ok('X' == decrypted[0]); + ptls_fusion_aesgcm_free(ctx); +} + +static void gcm_test_vectors(void) +{ + static const uint8_t one[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; + ptls_fusion_aesgcm_context_t *aead = ptls_fusion_aesgcm_new(zero, PTLS_AES128_KEY_SIZE, sizeof(zero)); + ptls_aead_supplementary_encryption_t *supp = NULL; + + for (int i = 0; i < 2; ++i) { + uint8_t encrypted[sizeof(zero) + 16], decrypted[sizeof(zero)]; +#define DOIT(aad, aadlen, ptlen, expected_tag, expected_supp) \ + do { \ + memset(encrypted, 0xcc, sizeof(encrypted)); \ + ptls_fusion_aesgcm_encrypt(aead, encrypted, zero, ptlen, _mm_setzero_si128(), aad, aadlen, supp); \ + ok(strcmp(tostr(encrypted + ptlen, 16), expected_tag) == 0); \ + if (supp != NULL) \ + ok(strcmp(tostr(supp->output, sizeof(supp->output)), expected_supp) == 0); \ + memset(decrypted, 0x55, sizeof(decrypted)); \ + ok(ptls_fusion_aesgcm_decrypt(aead, decrypted, encrypted, ptlen, _mm_setzero_si128(), aad, aadlen, encrypted + ptlen)); \ + ok(memcmp(decrypted, zero, ptlen) == 0); \ + } while (0) + + DOIT(zero, 13, 17, "1b4e515384e8aa5bb781ee12549a2ccf", "4576f18ef3ae9dfd37cf72c4592da874"); + DOIT(zero, 13, 32, "84030586f55adf8ac3c145913c6fd0f8", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 64, "66165d39739c50c90727e7d49127146b", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 65, "eb3b75e1d4431e1bb67da46f6a1a0edd", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 79, "8f4a96c7390c26bb15b68865e6a861b9", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 80, "5cc2554857b19e7a9e18d015feac61fd", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 81, "5a65f0d4db36c981bf7babd11691fe78", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 95, "6a8a51152efe928999a610d8a7b1df9d", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 96, "6b9c468e24ed96010687f3880a044d42", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 13, 97, "1b4eb785b884a7d4fdebaff81c1c12e8", "a062016e90dcc316d061fde5424cf34f"); + + DOIT(zero, 22, 1328, "0507baaece8d573774c94e8103821316", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 21, 1329, "dd70d59030eadb6313e778046540a253", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 20, 1330, "f1b456b955afde7603188af0124a32ef", "a062016e90dcc316d061fde5424cf34f"); + + DOIT(zero, 13, 1337, "a22deec51250a7eb1f4384dea5f2e890", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 12, 1338, "42102b0a499b2efa89702ece4b0c5789", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 11, 1339, "9827f0b34252160d0365ffaa9364bedc", "a062016e90dcc316d061fde5424cf34f"); + + DOIT(zero, 0, 80, "98885a3a22bd4742fe7b72172193b163", "a062016e90dcc316d061fde5424cf34f"); + DOIT(zero, 0, 96, "afd649fc51e14f3966e4518ad53b9ddc", "a062016e90dcc316d061fde5424cf34f"); + + DOIT(zero, 20, 85, "afe8b727057c804a0525c2914ef856b0", "a062016e90dcc316d061fde5424cf34f"); + +#undef DOIT + + supp = malloc(sizeof(*supp)); + supp->ctx = ptls_cipher_new(&ptls_fusion_aes128ctr, 1, one); + supp->input = encrypted + 2; + } + + ptls_cipher_free(supp->ctx); + free(supp); + ptls_fusion_aesgcm_free(aead); +} + +static void test_generated(int aes256) +{ + ptls_cipher_context_t *rand = ptls_cipher_new(&ptls_minicrypto_aes128ctr, 1, zero); + ptls_cipher_init(rand, zero); + int i; + + for (i = 0; i < 10000; ++i) { + /* generate input using RNG */ + uint8_t key[32], iv[12], aadlen, textlen; + uint64_t seq; + ptls_cipher_encrypt(rand, key, zero, sizeof(key)); + ptls_cipher_encrypt(rand, iv, zero, sizeof(iv)); + ptls_cipher_encrypt(rand, &aadlen, zero, sizeof(aadlen)); + ptls_cipher_encrypt(rand, &textlen, zero, sizeof(textlen)); + ptls_cipher_encrypt(rand, &seq, zero, sizeof(seq)); + uint8_t aad[aadlen], text[textlen]; + ptls_cipher_encrypt(rand, aad, zero, sizeof(aad)); + ptls_cipher_encrypt(rand, text, zero, sizeof(text)); + + uint8_t encrypted[textlen + 16], decrypted[textlen]; + memset(encrypted, 0x55, sizeof(encrypted)); + memset(decrypted, 0xcc, sizeof(decrypted)); + + { /* check using fusion */ + ptls_aead_context_t *fusion = + ptls_aead_new_direct(aes256 ? &ptls_fusion_aes256gcm : &ptls_fusion_aes128gcm, 1, key, iv); + ptls_aead_encrypt(fusion, encrypted, text, textlen, seq, aad, aadlen); + if (ptls_aead_decrypt(fusion, decrypted, encrypted, textlen + 16, seq, aad, aadlen) != textlen) + goto Fail; + if (memcmp(decrypted, text, textlen) != 0) + goto Fail; + ptls_aead_free(fusion); + } + + memset(decrypted, 0xcc, sizeof(decrypted)); + + { /* check that the encrypted text can be decrypted by OpenSSL */ + ptls_aead_context_t *mc = + ptls_aead_new_direct(aes256 ? &ptls_minicrypto_aes256gcm : &ptls_minicrypto_aes128gcm, 0, key, iv); + if (ptls_aead_decrypt(mc, decrypted, encrypted, textlen + 16, seq, aad, aadlen) != textlen) + goto Fail; + if (memcmp(decrypted, text, textlen) != 0) + goto Fail; + ptls_aead_free(mc); + } + } + + ok(1); + ptls_cipher_free(rand); + return; + +Fail: + note("mismatch at index=%d", i); + ok(0); +} + +static void test_generated_aes128(void) +{ + test_generated(0); +} + +static void test_generated_aes256(void) +{ + test_generated(1); +} + +int main(int argc, char **argv) +{ + if (!ptls_fusion_is_supported_by_cpu()) { + note("CPU does have the necessary features (avx2, aes, pclmul)\n"); + return done_testing(); + } + + subtest("loadn", test_loadn); + subtest("ecb", test_ecb); + subtest("gcm-basic", gcm_basic); + subtest("gcm-capacity", gcm_capacity); + subtest("gcm-test-vectors", gcm_test_vectors); + subtest("generated-128", test_generated_aes128); + subtest("generated-256", test_generated_aes256); + + return done_testing(); +}