diff --git a/.gitignore b/.gitignore index 24f4ec9..0668edc 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ Network Trash Folder Temporary Items .apdisk .vscode/settings.json +mlkem_native/test/test_mlkem768 +*.o diff --git a/licenses.md b/licenses.md index ce64699..98bd5dc 100644 --- a/licenses.md +++ b/licenses.md @@ -19,6 +19,17 @@ at your option. https://github.com/trustcrypto/libraries/blob/master/mbedtls-2.4.0/apache-2.0.txt https://www.apache.org/licenses/LICENSE-2.0 +## mlkem-native (ML-KEM / FIPS 203) +https://github.com/pq-code-package/mlkem-native + +Licensed under your choice of: + +Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +ISC License (https://opensource.org/licenses/ISC) +MIT License (https://opensource.org/licenses/MIT) + +Post-Quantum Cryptography Alliance, a project of the Linux Foundation. + ## Base64, Sha1, Sha256 https://github.com/B-Con/crypto-algorithms diff --git a/mlkem_native/mlkem_native.c b/mlkem_native/mlkem_native.c new file mode 100644 index 0000000..a00697e --- /dev/null +++ b/mlkem_native/mlkem_native.c @@ -0,0 +1,660 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mlkem-native repository. + * Do not modify it directly. + */ + +/****************************************************************************** + * + * Single compilation unit (SCU) for fixed-level build of mlkem-native + * + * This compilation unit bundles together all source files for a build + * of mlkem-native for a fixed security level (MLKEM-512/768/1024). + * + * # API + * + * The API exposed by this file is described in mlkem_native.h. + * + * # Multi-level build + * + * If you want an SCU build of mlkem-native with support for multiple security + * levels, you need to include this file multiple times, and set + * MLK_CONFIG_MULTILEVEL_WITH_SHARED and MLK_CONFIG_MULTILEVEL_NO_SHARED + * appropriately. This is exemplified in examples/monolithic_build_multilevel + * and examples/monolithic_build_multilevel_native. + * + * # Configuration + * + * The following options from the mlkem-native configuration are relevant: + * + * - MLK_CONFIG_FIPS202_CUSTOM_HEADER + * Set this option if you use a custom FIPS202 implementation. + * + * - MLK_CONFIG_USE_NATIVE_BACKEND_ARITH + * Set this option if you want to include the native arithmetic backends + * in your build. + * + * - MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 + * Set this option if you want to include the native FIPS202 backends + * in your build. + * + * - MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS + * Set this option if you want to keep the directives defined in + * level-independent headers. This is needed for a multi-level build. + */ + +/* If parts of the mlkem-native source tree are not used, + * consider reducing this header via `unifdef`. + * + * Example: + * ```bash + * unifdef -UMLK_CONFIG_USE_NATIVE_BACKEND_ARITH mlkem_native.c + * ``` + */ + +#include "src/common.h" + +#include "src/compress.c" +#include "src/debug.c" +#include "src/indcpa.c" +#include "src/kem.c" +#include "src/poly.c" +#include "src/poly_k.c" +#include "src/sampling.c" +#include "src/verify.c" + +#if !defined(MLK_CONFIG_FIPS202_CUSTOM_HEADER) +#include "src/fips202/fips202.c" +#include "src/fips202/fips202x4.c" +#include "src/fips202/keccakf1600.c" +#endif + +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH) +#if defined(MLK_SYS_AARCH64) +#include "src/native/aarch64/src/aarch64_zetas.c" +#include "src/native/aarch64/src/rej_uniform_table.c" +#endif +#if defined(MLK_SYS_X86_64) +#include "src/native/x86_64/src/compress_consts.c" +#include "src/native/x86_64/src/consts.c" +#include "src/native/x86_64/src/rej_uniform_table.c" +#endif +#if defined(MLK_SYS_RISCV64) +#include "src/native/riscv64/src/rv64v_debug.c" +#include "src/native/riscv64/src/rv64v_poly.c" +#endif +#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_ARITH */ + +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202) +#if defined(MLK_SYS_AARCH64) +#include "src/fips202/native/aarch64/src/keccakf1600_round_constants.c" +#endif +#if defined(MLK_SYS_X86_64) +#include "src/fips202/native/x86_64/src/keccakf1600_constants.c" +#endif +#if defined(MLK_SYS_ARMV81M_MVE) +#include "src/fips202/native/armv81m/src/keccak_f1600_x4_mve.c" +#include "src/fips202/native/armv81m/src/keccakf1600_round_constants.c" +#endif +#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 */ + +/* Macro #undef's + * + * The following undefines macros from headers + * included by the source files imported above. + * + * This is to allow building and linking multiple builds + * of mlkem-native for varying parameter sets through concatenation + * of this file, as if the files had been compiled separately. + * If this is not relevant to you, you may remove the following. + */ + +/* + * Undefine macros from MLK_CONFIG_PARAMETER_SET-specific files + */ +/* mlkem/mlkem_native.h */ +#undef CRYPTO_BYTES +#undef CRYPTO_CIPHERTEXTBYTES +#undef CRYPTO_PUBLICKEYBYTES +#undef CRYPTO_SECRETKEYBYTES +#undef CRYPTO_SYMBYTES +#undef MLKEM1024_BYTES +#undef MLKEM1024_CIPHERTEXTBYTES +#undef MLKEM1024_PUBLICKEYBYTES +#undef MLKEM1024_SECRETKEYBYTES +#undef MLKEM1024_SYMBYTES +#undef MLKEM512_BYTES +#undef MLKEM512_CIPHERTEXTBYTES +#undef MLKEM512_PUBLICKEYBYTES +#undef MLKEM512_SECRETKEYBYTES +#undef MLKEM512_SYMBYTES +#undef MLKEM768_BYTES +#undef MLKEM768_CIPHERTEXTBYTES +#undef MLKEM768_PUBLICKEYBYTES +#undef MLKEM768_SECRETKEYBYTES +#undef MLKEM768_SYMBYTES +#undef MLKEM_BYTES +#undef MLKEM_CIPHERTEXTBYTES +#undef MLKEM_CIPHERTEXTBYTES_ +#undef MLKEM_PUBLICKEYBYTES +#undef MLKEM_PUBLICKEYBYTES_ +#undef MLKEM_SECRETKEYBYTES +#undef MLKEM_SECRETKEYBYTES_ +#undef MLKEM_SYMBYTES +#undef MLK_API_CONCAT +#undef MLK_API_CONCAT_ +#undef MLK_API_CONCAT_UNDERSCORE +#undef MLK_API_LEGACY_CONFIG +#undef MLK_API_MUST_CHECK_RETURN_VALUE +#undef MLK_API_NAMESPACE +#undef MLK_API_QUALIFIER +#undef MLK_CONFIG_API_CONSTANTS_ONLY +#undef MLK_CONFIG_API_NAMESPACE_PREFIX +#undef MLK_CONFIG_API_NO_SUPERCOP +#undef MLK_CONFIG_API_PARAMETER_SET +#undef MLK_CONFIG_API_QUALIFIER +#undef MLK_ERR_FAIL +#undef MLK_ERR_OUT_OF_MEMORY +#undef MLK_ERR_RNG_FAIL +#undef MLK_H +#undef MLK_MAX3_ +#undef MLK_TOTAL_ALLOC_1024 +#undef MLK_TOTAL_ALLOC_1024_DECAPS +#undef MLK_TOTAL_ALLOC_1024_ENCAPS +#undef MLK_TOTAL_ALLOC_1024_KEYPAIR +#undef MLK_TOTAL_ALLOC_1024_KEYPAIR_NO_PCT +#undef MLK_TOTAL_ALLOC_1024_KEYPAIR_PCT +#undef MLK_TOTAL_ALLOC_512 +#undef MLK_TOTAL_ALLOC_512_DECAPS +#undef MLK_TOTAL_ALLOC_512_ENCAPS +#undef MLK_TOTAL_ALLOC_512_KEYPAIR +#undef MLK_TOTAL_ALLOC_512_KEYPAIR_NO_PCT +#undef MLK_TOTAL_ALLOC_512_KEYPAIR_PCT +#undef MLK_TOTAL_ALLOC_768 +#undef MLK_TOTAL_ALLOC_768_DECAPS +#undef MLK_TOTAL_ALLOC_768_ENCAPS +#undef MLK_TOTAL_ALLOC_768_KEYPAIR +#undef MLK_TOTAL_ALLOC_768_KEYPAIR_NO_PCT +#undef MLK_TOTAL_ALLOC_768_KEYPAIR_PCT +#undef crypto_kem_check_pk +#undef crypto_kem_check_sk +#undef crypto_kem_dec +#undef crypto_kem_enc +#undef crypto_kem_enc_derand +#undef crypto_kem_keypair +#undef crypto_kem_keypair_derand +/* mlkem/src/common.h */ +#undef MLK_ADD_PARAM_SET +#undef MLK_ALLOC +#undef MLK_APPLY +#undef MLK_ASM_FN_SIZE +#undef MLK_ASM_FN_SYMBOL +#undef MLK_ASM_NAMESPACE +#undef MLK_BUILD_INTERNAL +#undef MLK_COMMON_H +#undef MLK_CONCAT +#undef MLK_CONCAT_ +#undef MLK_CONTEXT_PARAMETERS_0 +#undef MLK_CONTEXT_PARAMETERS_1 +#undef MLK_CONTEXT_PARAMETERS_2 +#undef MLK_CONTEXT_PARAMETERS_3 +#undef MLK_CONTEXT_PARAMETERS_4 +#undef MLK_EMPTY_CU +#undef MLK_ERR_FAIL +#undef MLK_ERR_OUT_OF_MEMORY +#undef MLK_ERR_RNG_FAIL +#undef MLK_EXTERNAL_API +#undef MLK_FIPS202X4_HEADER_FILE +#undef MLK_FIPS202_HEADER_FILE +#undef MLK_FREE +#undef MLK_INTERNAL_API +#undef MLK_NAMESPACE +#undef MLK_NAMESPACE_K +#undef MLK_NAMESPACE_PREFIX +#undef MLK_NAMESPACE_PREFIX_K +#undef mlk_memcpy +#undef mlk_memset +/* mlkem/src/indcpa.h */ +#undef MLK_INDCPA_H +#undef mlk_gen_matrix +#undef mlk_indcpa_dec +#undef mlk_indcpa_enc +#undef mlk_indcpa_keypair_derand +/* mlkem/src/kem.h */ +#undef MLK_KEM_H +#undef mlk_kem_check_pk +#undef mlk_kem_check_sk +#undef mlk_kem_dec +#undef mlk_kem_enc +#undef mlk_kem_enc_derand +#undef mlk_kem_keypair +#undef mlk_kem_keypair_derand +/* mlkem/src/params.h */ +#undef MLKEM_DU +#undef MLKEM_DV +#undef MLKEM_ETA1 +#undef MLKEM_ETA2 +#undef MLKEM_INDCCA_CIPHERTEXTBYTES +#undef MLKEM_INDCCA_PUBLICKEYBYTES +#undef MLKEM_INDCCA_SECRETKEYBYTES +#undef MLKEM_INDCPA_BYTES +#undef MLKEM_INDCPA_MSGBYTES +#undef MLKEM_INDCPA_PUBLICKEYBYTES +#undef MLKEM_INDCPA_SECRETKEYBYTES +#undef MLKEM_K +#undef MLKEM_N +#undef MLKEM_POLYBYTES +#undef MLKEM_POLYCOMPRESSEDBYTES_D10 +#undef MLKEM_POLYCOMPRESSEDBYTES_D11 +#undef MLKEM_POLYCOMPRESSEDBYTES_D4 +#undef MLKEM_POLYCOMPRESSEDBYTES_D5 +#undef MLKEM_POLYCOMPRESSEDBYTES_DU +#undef MLKEM_POLYCOMPRESSEDBYTES_DV +#undef MLKEM_POLYVECBYTES +#undef MLKEM_POLYVECCOMPRESSEDBYTES_DU +#undef MLKEM_Q +#undef MLKEM_Q_HALF +#undef MLKEM_SSBYTES +#undef MLKEM_SYMBYTES +#undef MLKEM_UINT12_LIMIT +#undef MLK_PARAMS_H +/* mlkem/src/poly_k.h */ +#undef MLK_POLY_K_H +#undef mlk_poly_compress_du +#undef mlk_poly_compress_dv +#undef mlk_poly_decompress_du +#undef mlk_poly_decompress_dv +#undef mlk_poly_getnoise_eta1122_4x +#undef mlk_poly_getnoise_eta1_4x +#undef mlk_poly_getnoise_eta2 +#undef mlk_poly_getnoise_eta2_4x +#undef mlk_polymat +#undef mlk_polyvec +#undef mlk_polyvec_add +#undef mlk_polyvec_basemul_acc_montgomery_cached +#undef mlk_polyvec_compress_du +#undef mlk_polyvec_decompress_du +#undef mlk_polyvec_frombytes +#undef mlk_polyvec_invntt_tomont +#undef mlk_polyvec_mulcache +#undef mlk_polyvec_mulcache_compute +#undef mlk_polyvec_ntt +#undef mlk_polyvec_reduce +#undef mlk_polyvec_tobytes +#undef mlk_polyvec_tomont + +#if !defined(MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS) +/* + * Undefine macros from MLK_CONFIG_PARAMETER_SET-generic files + */ +/* mlkem/src/compress.h */ +#undef MLK_COMPRESS_H +#undef mlk_poly_compress_d10 +#undef mlk_poly_compress_d11 +#undef mlk_poly_compress_d4 +#undef mlk_poly_compress_d5 +#undef mlk_poly_decompress_d10 +#undef mlk_poly_decompress_d11 +#undef mlk_poly_decompress_d4 +#undef mlk_poly_decompress_d5 +#undef mlk_poly_frombytes +#undef mlk_poly_frommsg +#undef mlk_poly_tobytes +#undef mlk_poly_tomsg +/* mlkem/src/debug.h */ +#undef MLK_DEBUG_H +#undef mlk_assert +#undef mlk_assert_abs_bound +#undef mlk_assert_abs_bound_2d +#undef mlk_assert_bound +#undef mlk_assert_bound_2d +#undef mlk_debug_check_assert +#undef mlk_debug_check_bounds +/* mlkem/src/poly.h */ +#undef MLK_INVNTT_BOUND +#undef MLK_NTT_BOUND +#undef MLK_POLY_H +#undef mlk_poly_add +#undef mlk_poly_invntt_tomont +#undef mlk_poly_mulcache_compute +#undef mlk_poly_ntt +#undef mlk_poly_reduce +#undef mlk_poly_sub +#undef mlk_poly_tomont +/* mlkem/src/randombytes.h */ +#undef MLK_RANDOMBYTES_H +/* mlkem/src/sampling.h */ +#undef MLK_SAMPLING_H +#undef mlk_poly_cbd2 +#undef mlk_poly_cbd3 +#undef mlk_poly_rej_uniform +#undef mlk_poly_rej_uniform_x4 +/* mlkem/src/symmetric.h */ +#undef MLK_SYMMETRIC_H +#undef MLK_XOF_RATE +#undef mlk_hash_g +#undef mlk_hash_h +#undef mlk_hash_j +#undef mlk_prf_eta +#undef mlk_prf_eta1 +#undef mlk_prf_eta1_x4 +#undef mlk_prf_eta2 +#undef mlk_xof_absorb +#undef mlk_xof_ctx +#undef mlk_xof_init +#undef mlk_xof_release +#undef mlk_xof_squeezeblocks +#undef mlk_xof_x4_absorb +#undef mlk_xof_x4_ctx +#undef mlk_xof_x4_init +#undef mlk_xof_x4_release +#undef mlk_xof_x4_squeezeblocks +/* mlkem/src/sys.h */ +#undef MLK_ALIGN +#undef MLK_ALIGN_UP +#undef MLK_ALWAYS_INLINE +#undef MLK_CET_ENDBR +#undef MLK_CT_TESTING_DECLASSIFY +#undef MLK_CT_TESTING_SECRET +#undef MLK_DEFAULT_ALIGN +#undef MLK_HAVE_INLINE_ASM +#undef MLK_INLINE +#undef MLK_MUST_CHECK_RETURN_VALUE +#undef MLK_RESTRICT +#undef MLK_STATIC_TESTABLE +#undef MLK_SYS_AARCH64 +#undef MLK_SYS_AARCH64_EB +#undef MLK_SYS_APPLE +#undef MLK_SYS_ARMV81M_MVE +#undef MLK_SYS_BIG_ENDIAN +#undef MLK_SYS_H +#undef MLK_SYS_LINUX +#undef MLK_SYS_LITTLE_ENDIAN +#undef MLK_SYS_PPC64LE +#undef MLK_SYS_RISCV32 +#undef MLK_SYS_RISCV64 +#undef MLK_SYS_RISCV64_RVV +#undef MLK_SYS_WINDOWS +#undef MLK_SYS_X86_64 +#undef MLK_SYS_X86_64_AVX2 +/* mlkem/src/verify.h */ +#undef MLK_USE_ASM_VALUE_BARRIER +#undef MLK_VERIFY_H +#undef mlk_ct_opt_blocker_u64 +/* mlkem/src/cbmc.h */ +#undef MLK_CBMC_H +#undef __contract__ +#undef __loop__ + +#if !defined(MLK_CONFIG_FIPS202_CUSTOM_HEADER) +/* + * Undefine macros from FIPS-202 files + */ +/* mlkem/src/fips202/fips202.h */ +#undef FIPS202_X4_DEFAULT_IMPLEMENTATION +#undef MLK_FIPS202_FIPS202_H +#undef SHA3_256_HASHBYTES +#undef SHA3_256_RATE +#undef SHA3_384_RATE +#undef SHA3_512_HASHBYTES +#undef SHA3_512_RATE +#undef SHAKE128_RATE +#undef SHAKE256_RATE +#undef mlk_sha3_256 +#undef mlk_sha3_512 +#undef mlk_shake128_absorb_once +#undef mlk_shake128_init +#undef mlk_shake128_release +#undef mlk_shake128_squeezeblocks +#undef mlk_shake256 +/* mlkem/src/fips202/fips202x4.h */ +#undef MLK_FIPS202_FIPS202X4_H +#undef mlk_shake128x4_absorb_once +#undef mlk_shake128x4_init +#undef mlk_shake128x4_release +#undef mlk_shake128x4_squeezeblocks +#undef mlk_shake256x4 +/* mlkem/src/fips202/keccakf1600.h */ +#undef MLK_FIPS202_KECCAKF1600_H +#undef MLK_KECCAK_LANES +#undef MLK_KECCAK_WAY +#undef mlk_keccakf1600_extract_bytes +#undef mlk_keccakf1600_permute +#undef mlk_keccakf1600_xor_bytes +#undef mlk_keccakf1600x4_extract_bytes +#undef mlk_keccakf1600x4_permute +#undef mlk_keccakf1600x4_xor_bytes +#endif /* !MLK_CONFIG_FIPS202_CUSTOM_HEADER */ + +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202) +/* mlkem/src/fips202/native/api.h */ +#undef MLK_FIPS202_NATIVE_API_H +#undef MLK_NATIVE_FUNC_FALLBACK +#undef MLK_NATIVE_FUNC_SUCCESS +/* mlkem/src/fips202/native/auto.h */ +#undef MLK_FIPS202_NATIVE_AUTO_H +#if defined(MLK_SYS_AARCH64) +/* + * Undefine macros from native code (FIPS202, AArch64) + */ +/* mlkem/src/fips202/native/aarch64/auto.h */ +#undef MLK_FIPS202_NATIVE_AARCH64_AUTO_H +/* mlkem/src/fips202/native/aarch64/src/fips202_native_aarch64.h */ +#undef MLK_FIPS202_NATIVE_AARCH64_SRC_FIPS202_NATIVE_AARCH64_H +#undef mlk_keccak_f1600_x1_scalar_asm +#undef mlk_keccak_f1600_x1_v84a_asm +#undef mlk_keccak_f1600_x2_v84a_asm +#undef mlk_keccak_f1600_x4_v8a_scalar_hybrid_asm +#undef mlk_keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm +#undef mlk_keccakf1600_round_constants +/* mlkem/src/fips202/native/aarch64/x1_scalar.h */ +#undef MLK_FIPS202_AARCH64_NEED_X1_SCALAR +#undef MLK_FIPS202_NATIVE_AARCH64_X1_SCALAR_H +#undef MLK_USE_FIPS202_X1_NATIVE +/* mlkem/src/fips202/native/aarch64/x1_v84a.h */ +#undef MLK_FIPS202_AARCH64_NEED_X1_V84A +#undef MLK_FIPS202_NATIVE_AARCH64_X1_V84A_H +#undef MLK_USE_FIPS202_X1_NATIVE +/* mlkem/src/fips202/native/aarch64/x2_v84a.h */ +#undef MLK_FIPS202_AARCH64_NEED_X2_V84A +#undef MLK_FIPS202_NATIVE_AARCH64_X2_V84A_H +#undef MLK_USE_FIPS202_X4_NATIVE +/* mlkem/src/fips202/native/aarch64/x4_v8a_scalar.h */ +#undef MLK_FIPS202_AARCH64_NEED_X4_V8A_SCALAR_HYBRID +#undef MLK_FIPS202_NATIVE_AARCH64_X4_V8A_SCALAR_H +#undef MLK_USE_FIPS202_X4_NATIVE +/* mlkem/src/fips202/native/aarch64/x4_v8a_v84a_scalar.h */ +#undef MLK_FIPS202_AARCH64_NEED_X4_V8A_V84A_SCALAR_HYBRID +#undef MLK_FIPS202_NATIVE_AARCH64_X4_V8A_V84A_SCALAR_H +#undef MLK_USE_FIPS202_X4_NATIVE +#endif /* MLK_SYS_AARCH64 */ +#if defined(MLK_SYS_X86_64) +/* + * Undefine macros from native code (FIPS202, x86_64) + */ +/* mlkem/src/fips202/native/x86_64/keccak_f1600_x4_avx2.h */ +#undef MLK_FIPS202_NATIVE_X86_64_KECCAK_F1600_X4_AVX2_H +#undef MLK_FIPS202_X86_64_NEED_X4_AVX2 +#undef MLK_USE_FIPS202_X4_NATIVE +/* mlkem/src/fips202/native/x86_64/src/fips202_native_x86_64.h */ +#undef MLK_FIPS202_NATIVE_X86_64_SRC_FIPS202_NATIVE_X86_64_H +#undef mlk_keccak_f1600_x4_avx2 +#undef mlk_keccak_rho56 +#undef mlk_keccak_rho8 +#undef mlk_keccakf1600_round_constants +#endif /* MLK_SYS_X86_64 */ +#if defined(MLK_SYS_ARMV81M_MVE) +/* + * Undefine macros from native code (FIPS202, Armv8.1-M) + */ +/* mlkem/src/fips202/native/armv81m/mve.h */ +#undef MLK_FIPS202_ARMV81M_NEED_X4 +#undef MLK_FIPS202_NATIVE_ARMV81M +#undef MLK_FIPS202_NATIVE_ARMV81M_MVE_H +#undef MLK_USE_FIPS202_X4_EXTRACT_BYTES_NATIVE +#undef MLK_USE_FIPS202_X4_NATIVE +#undef MLK_USE_FIPS202_X4_XOR_BYTES_NATIVE +#undef mlk_keccak_f1600_x4_native_impl +#undef mlk_keccak_f1600_x4_state_extract_bytes +#undef mlk_keccak_f1600_x4_state_xor_bytes +/* mlkem/src/fips202/native/armv81m/src/fips202_native_armv81m.h */ +#undef MLK_FIPS202_NATIVE_ARMV81M_SRC_FIPS202_NATIVE_ARMV81M_H +#undef mlk_keccak_f1600_x4_mve_asm +#undef mlk_keccak_f1600_x4_state_extract_bytes_asm +#undef mlk_keccak_f1600_x4_state_xor_bytes_asm +#undef mlk_keccakf1600_round_constants +#endif /* MLK_SYS_ARMV81M_MVE */ +#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 */ +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH) +/* mlkem/src/native/api.h */ +#undef MLK_INVNTT_BOUND +#undef MLK_NATIVE_API_H +#undef MLK_NATIVE_FUNC_FALLBACK +#undef MLK_NATIVE_FUNC_SUCCESS +#undef MLK_NTT_BOUND +/* mlkem/src/native/meta.h */ +#undef MLK_NATIVE_META_H +#if defined(MLK_SYS_AARCH64) +/* + * Undefine macros from native code (Arith, AArch64) + */ +/* mlkem/src/native/aarch64/meta.h */ +#undef MLK_ARITH_BACKEND_AARCH64 +#undef MLK_NATIVE_AARCH64_META_H +#undef MLK_USE_NATIVE_INTT +#undef MLK_USE_NATIVE_NTT +#undef MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED +#undef MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE +#undef MLK_USE_NATIVE_POLY_REDUCE +#undef MLK_USE_NATIVE_POLY_TOBYTES +#undef MLK_USE_NATIVE_POLY_TOMONT +#undef MLK_USE_NATIVE_REJ_UNIFORM +/* mlkem/src/native/aarch64/src/arith_native_aarch64.h */ +#undef MLK_NATIVE_AARCH64_SRC_ARITH_NATIVE_AARCH64_H +#undef mlk_aarch64_invntt_zetas_layer12345 +#undef mlk_aarch64_invntt_zetas_layer67 +#undef mlk_aarch64_ntt_zetas_layer12345 +#undef mlk_aarch64_ntt_zetas_layer67 +#undef mlk_aarch64_zetas_mulcache_native +#undef mlk_aarch64_zetas_mulcache_twisted_native +#undef mlk_intt_asm +#undef mlk_ntt_asm +#undef mlk_poly_mulcache_compute_asm +#undef mlk_poly_reduce_asm +#undef mlk_poly_tobytes_asm +#undef mlk_poly_tomont_asm +#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k2 +#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k3 +#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k4 +#undef mlk_rej_uniform_asm +#undef mlk_rej_uniform_table +#endif /* MLK_SYS_AARCH64 */ +#if defined(MLK_SYS_X86_64) +/* + * Undefine macros from native code (Arith, X86_64) + */ +/* mlkem/src/native/x86_64/meta.h */ +#undef MLK_ARITH_BACKEND_X86_64_DEFAULT +#undef MLK_NATIVE_X86_64_META_H +#undef MLK_USE_NATIVE_INTT +#undef MLK_USE_NATIVE_NTT +#undef MLK_USE_NATIVE_NTT_CUSTOM_ORDER +#undef MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED +#undef MLK_USE_NATIVE_POLY_COMPRESS_D10 +#undef MLK_USE_NATIVE_POLY_COMPRESS_D11 +#undef MLK_USE_NATIVE_POLY_COMPRESS_D4 +#undef MLK_USE_NATIVE_POLY_COMPRESS_D5 +#undef MLK_USE_NATIVE_POLY_DECOMPRESS_D10 +#undef MLK_USE_NATIVE_POLY_DECOMPRESS_D11 +#undef MLK_USE_NATIVE_POLY_DECOMPRESS_D4 +#undef MLK_USE_NATIVE_POLY_DECOMPRESS_D5 +#undef MLK_USE_NATIVE_POLY_FROMBYTES +#undef MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE +#undef MLK_USE_NATIVE_POLY_REDUCE +#undef MLK_USE_NATIVE_POLY_TOBYTES +#undef MLK_USE_NATIVE_POLY_TOMONT +#undef MLK_USE_NATIVE_REJ_UNIFORM +/* mlkem/src/native/x86_64/src/arith_native_x86_64.h */ +#undef MLK_AVX2_REJ_UNIFORM_BUFLEN +#undef MLK_NATIVE_X86_64_SRC_ARITH_NATIVE_X86_64_H +#undef mlk_invntt_avx2 +#undef mlk_ntt_avx2 +#undef mlk_nttfrombytes_avx2 +#undef mlk_ntttobytes_avx2 +#undef mlk_nttunpack_avx2 +#undef mlk_poly_compress_d10_avx2 +#undef mlk_poly_compress_d11_avx2 +#undef mlk_poly_compress_d4_avx2 +#undef mlk_poly_compress_d5_avx2 +#undef mlk_poly_decompress_d10_avx2 +#undef mlk_poly_decompress_d11_avx2 +#undef mlk_poly_decompress_d4_avx2 +#undef mlk_poly_decompress_d5_avx2 +#undef mlk_poly_mulcache_compute_avx2 +#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k2 +#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k3 +#undef mlk_polyvec_basemul_acc_montgomery_cached_asm_k4 +#undef mlk_reduce_avx2 +#undef mlk_rej_uniform_asm +#undef mlk_rej_uniform_table +#undef mlk_tomont_avx2 +/* mlkem/src/native/x86_64/src/compress_consts.h */ +#undef MLK_NATIVE_X86_64_SRC_COMPRESS_CONSTS_H +#undef mlk_compress_d10_data +#undef mlk_compress_d11_data +#undef mlk_compress_d4_data +#undef mlk_compress_d5_data +#undef mlk_decompress_d10_data +#undef mlk_decompress_d11_data +#undef mlk_decompress_d4_data +#undef mlk_decompress_d5_data +/* mlkem/src/native/x86_64/src/consts.h */ +#undef MLK_AVX2_BACKEND_DATA_OFFSET_MULCACHE_TWIDDLES +#undef MLK_AVX2_BACKEND_DATA_OFFSET_REVIDXB +#undef MLK_AVX2_BACKEND_DATA_OFFSET_REVIDXD +#undef MLK_AVX2_BACKEND_DATA_OFFSET_ZETAS_EXP +#undef MLK_NATIVE_X86_64_SRC_CONSTS_H +#undef mlk_qdata +#endif /* MLK_SYS_X86_64 */ +#if defined(MLK_SYS_RISCV64) +/* + * Undefine macros from native code (Arith, RISC-V 64) + */ +/* mlkem/src/native/riscv64/meta.h */ +#undef MLK_ARITH_BACKEND_RISCV64 +#undef MLK_NATIVE_RISCV64_META_H +#undef MLK_USE_NATIVE_INTT +#undef MLK_USE_NATIVE_NTT +#undef MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED +#undef MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE +#undef MLK_USE_NATIVE_POLY_REDUCE +#undef MLK_USE_NATIVE_POLY_TOMONT +#undef MLK_USE_NATIVE_REJ_UNIFORM +/* mlkem/src/native/riscv64/src/arith_native_riscv64.h */ +#undef MLK_NATIVE_RISCV64_SRC_ARITH_NATIVE_RISCV64_H +#undef mlk_rv64v_poly_add +#undef mlk_rv64v_poly_basemul_mont_add_k2 +#undef mlk_rv64v_poly_basemul_mont_add_k3 +#undef mlk_rv64v_poly_basemul_mont_add_k4 +#undef mlk_rv64v_poly_invntt_tomont +#undef mlk_rv64v_poly_ntt +#undef mlk_rv64v_poly_reduce +#undef mlk_rv64v_poly_sub +#undef mlk_rv64v_poly_tomont +#undef mlk_rv64v_rej_uniform +/* mlkem/src/native/riscv64/src/rv64v_debug.h */ +#undef MLK_NATIVE_RISCV64_SRC_RV64V_DEBUG_H +#undef mlk_assert_abs_bound_int16m1 +#undef mlk_assert_abs_bound_int16m2 +#undef mlk_assert_bound_int16m1 +#undef mlk_assert_bound_int16m2 +#undef mlk_debug_check_bounds_int16m1 +#undef mlk_debug_check_bounds_int16m2 +#endif /* MLK_SYS_RISCV64 */ +#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_ARITH */ +#endif /* !MLK_CONFIG_MONOBUILD_KEEP_SHARED_HEADERS */ diff --git a/mlkem_native/mlkem_native.h b/mlkem_native/mlkem_native.h new file mode 100644 index 0000000..302ca3f --- /dev/null +++ b/mlkem_native/mlkem_native.h @@ -0,0 +1,538 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + */ + +#ifndef MLK_H +#define MLK_H + +/* + * Public API for mlkem-native. + * + * This header defines the public API of a single build of mlkem-native. + * + * Make sure the configuration file is in the include path + * (this is "mlkem_native_config.h" by default, or MLK_CONFIG_FILE if defined). + * + * # Multi-level builds + * + * This header specifies a build of mlkem-native for a fixed security level. + * If you need multiple security levels, leave the security level unspecified + * in the configuration file and include this header multiple times, setting + * MLK_CONFIG_PARAMETER_SET accordingly for each, and #undef'ing the MLK_H + * guard to allow multiple inclusions. + * + * # Legacy configuration (deprecated) + * + * Instead of providing the config file used for the build, you can + * alternatively set the following configuration options prior to + * including this header. + * + * This method of configuration is deprecated. + * It will be removed in mlkem-native-v2. + * + * - MLK_CONFIG_API_PARAMETER_SET [required] + * + * The parameter set used for the build; 512, 768, or 1024. + * + * - MLK_CONFIG_API_NAMESPACE_PREFIX [required] + * + * The namespace prefix used for the build. + * + * NOTE: + * For a multi-level build, you must include the 512/768/1024 suffixes + * in MLK_CONFIG_API_NAMESPACE_PREFIX. + * + * - MLK_CONFIG_API_NO_SUPERCOP [optional] + * + * By default, this header will also expose the mlkem-native API in the + * SUPERCOP naming convention crypto_kem_xxx. If you don't want/need this, + * set MLK_CONFIG_API_NO_SUPERCOP. You must set this for a multi-level build. + * + * - MLK_CONFIG_API_CONSTANTS_ONLY [optional] + * + * If you don't want this header to expose any function declarations, + * but only constants for the sizes of key material, set + * MLK_CONFIG_API_CONSTANTS_ONLY. In this case, you don't need to set + * MLK_CONFIG_API_PARAMETER_SET or MLK_CONFIG_API_NAMESPACE_PREFIX, + * nor include a configuration. + * + * - MLK_CONFIG_API_QUALIFIER [optional] + * + * Qualifier to apply to external API. + * + ******************************************************************************/ + +/******************************* Key sizes ************************************/ + +/* Sizes of cryptographic material, per parameter set */ +/* See mlkem/common.h for the arithmetic expressions giving rise to these */ +/* check-magic: off */ +#define MLKEM512_SECRETKEYBYTES 1632 +#define MLKEM512_PUBLICKEYBYTES 800 +#define MLKEM512_CIPHERTEXTBYTES 768 + +#define MLKEM768_SECRETKEYBYTES 2400 +#define MLKEM768_PUBLICKEYBYTES 1184 +#define MLKEM768_CIPHERTEXTBYTES 1088 + +#define MLKEM1024_SECRETKEYBYTES 3168 +#define MLKEM1024_PUBLICKEYBYTES 1568 +#define MLKEM1024_CIPHERTEXTBYTES 1568 +/* check-magic: on */ + +/* Size of randomness coins in bytes (level-independent) */ +#define MLKEM_SYMBYTES 32 +#define MLKEM512_SYMBYTES MLKEM_SYMBYTES +#define MLKEM768_SYMBYTES MLKEM_SYMBYTES +#define MLKEM1024_SYMBYTES MLKEM_SYMBYTES +/* Size of shared secret in bytes (level-independent) */ +#define MLKEM_BYTES 32 +#define MLKEM512_BYTES MLKEM_BYTES +#define MLKEM768_BYTES MLKEM_BYTES +#define MLKEM1024_BYTES MLKEM_BYTES + +/* Sizes of cryptographic material, as a function of LVL=512,768,1024 */ +#define MLKEM_SECRETKEYBYTES_(LVL) MLKEM##LVL##_SECRETKEYBYTES +#define MLKEM_PUBLICKEYBYTES_(LVL) MLKEM##LVL##_PUBLICKEYBYTES +#define MLKEM_CIPHERTEXTBYTES_(LVL) MLKEM##LVL##_CIPHERTEXTBYTES +#define MLKEM_SECRETKEYBYTES(LVL) MLKEM_SECRETKEYBYTES_(LVL) +#define MLKEM_PUBLICKEYBYTES(LVL) MLKEM_PUBLICKEYBYTES_(LVL) +#define MLKEM_CIPHERTEXTBYTES(LVL) MLKEM_CIPHERTEXTBYTES_(LVL) + +/****************************** Error codes ***********************************/ + +/* Generic failure condition */ +#define MLK_ERR_FAIL -1 +/* An allocation failed. This can only happen if MLK_CONFIG_CUSTOM_ALLOC_FREE + * is defined and the provided MLK_CUSTOM_ALLOC can fail. */ +#define MLK_ERR_OUT_OF_MEMORY -2 +/* An rng failure occured. Might be due to insufficient entropy or + * system misconfiguration. */ +#define MLK_ERR_RNG_FAIL -3 + +/****************************** Function API **********************************/ + +#define MLK_API_CONCAT_(x, y) x##y +#define MLK_API_CONCAT(x, y) MLK_API_CONCAT_(x, y) +#define MLK_API_CONCAT_UNDERSCORE(x, y) MLK_API_CONCAT(MLK_API_CONCAT(x, _), y) + +#if !defined(MLK_CONFIG_API_PARAMETER_SET) +/* Recommended configuration via same config file as used for the build. */ + +/* For now, we derive the legacy API configuration MLK_CONFIG_API_XXX from + * the config file. In mlkem-native-v2, this will be removed and we will + * exclusively work with MLK_CONFIG_XXX. */ + +/* You need to make sure the config file is in the include path. */ +#if defined(MLK_CONFIG_FILE) +#include MLK_CONFIG_FILE +#else +#include "mlkem_native_config.h" +#endif + +#define MLK_CONFIG_API_PARAMETER_SET MLK_CONFIG_PARAMETER_SET + +#if defined(MLK_CONFIG_MULTILEVEL_BUILD) +#define MLK_CONFIG_API_NAMESPACE_PREFIX \ + MLK_API_CONCAT(MLK_CONFIG_NAMESPACE_PREFIX, MLK_CONFIG_PARAMETER_SET) +#else +#define MLK_CONFIG_API_NAMESPACE_PREFIX MLK_CONFIG_NAMESPACE_PREFIX +#endif + +#if defined(MLK_CONFIG_NO_SUPERCOP) +#define MLK_CONFIG_API_NO_SUPERCOP +#endif + +#if defined(MLK_CONFIG_CONSTANTS_ONLY) +#define MLK_CONFIG_API_CONSTANTS_ONLY +#endif + +#if defined(MLK_CONFIG_EXTERNAL_API_QUALIFIER) +#define MLK_CONFIG_API_QUALIFIER MLK_CONFIG_EXTERNAL_API_QUALIFIER +#endif + +#else /* !MLK_CONFIG_API_PARAMETER_SET */ + +#define MLK_API_LEGACY_CONFIG + +#endif /* MLK_CONFIG_API_PARAMETER_SET */ + +#define MLK_API_NAMESPACE(sym) \ + MLK_API_CONCAT_UNDERSCORE(MLK_CONFIG_API_NAMESPACE_PREFIX, sym) + +#if defined(__GNUC__) || defined(clang) +#define MLK_API_MUST_CHECK_RETURN_VALUE __attribute__((warn_unused_result)) +#else +#define MLK_API_MUST_CHECK_RETURN_VALUE +#endif + +#if defined(MLK_CONFIG_API_QUALIFIER) +#define MLK_API_QUALIFIER MLK_CONFIG_API_QUALIFIER +#else +#define MLK_API_QUALIFIER +#endif + +#if !defined(MLK_CONFIG_API_CONSTANTS_ONLY) + +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + +/************************************************* + * Name: crypto_kem_keypair_derand + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t pk[]: pointer to output public key, an array of + * length MLKEM{512,768,1024}_PUBLICKEYBYTES bytes. + * - uint8_t sk[]: pointer to output private key, an array of + * of MLKEM{512,768,1024}_SECRETKEYBYTES bytes. + * - uint8_t *coins: pointer to input randomness, an array of + * 2*MLKEM_SYMBYTES uniformly random bytes. + * + * Returns: - 0: On success + * - MLK_ERR_FAIL: If MLK_CONFIG_KEYGEN_PCT is enabled and the + * PCT failed. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Algorithm 16, ML-KEM.KeyGen_Internal] + * + **************************************************/ +MLK_API_QUALIFIER +MLK_API_MUST_CHECK_RETURN_VALUE +int MLK_API_NAMESPACE(keypair_derand)( + uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)], + uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)], + const uint8_t coins[2 * MLKEM_SYMBYTES] +#ifdef MLK_CONFIG_CONTEXT_PARAMETER + , + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context +#endif +); + + +#if !defined(MLK_CONFIG_NO_RANDOMIZED_API) +/************************************************* + * Name: crypto_kem_keypair + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key, an array of + * MLKEM{512,768,1024}_PUBLICKEYBYTES bytes. + * - uint8_t *sk: pointer to output private key, an array of + * MLKEM{512,768,1024}_SECRETKEYBYTES bytes. + * + * Returns: - 0: On success + * - MLK_ERR_FAIL: If MLK_CONFIG_KEYGEN_PCT is enabled and the + * PCT failed. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * - MLK_ERR_RNG_FAIL: Random number generation failed. + * + * Specification: Implements @[FIPS203, Algorithm 19, ML-KEM.KeyGen] + * + **************************************************/ +MLK_API_QUALIFIER +MLK_API_MUST_CHECK_RETURN_VALUE +int MLK_API_NAMESPACE(keypair)( + uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)], + uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)] +#ifdef MLK_CONFIG_CONTEXT_PARAMETER + , + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context +#endif +); +#endif /* !MLK_CONFIG_NO_RANDOMIZED_API */ + +/************************************************* + * Name: crypto_kem_enc_derand + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text, an array of + * MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes. + * - uint8_t *ss: pointer to output shared secret, an array of + * MLKEM_BYTES bytes. + * - const uint8_t *pk: pointer to input public key, an array of + * MLKEM{512,768,1024}_PUBLICKEYBYTES bytes. + * - const uint8_t *coins: pointer to input randomness, an array of + * MLKEM_SYMBYTES bytes. + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the 'modulus check' @[FIPS203, Section 7.2] + * for the public key fails. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Algorithm 17, ML-KEM.Encaps_Internal] + * + **************************************************/ +MLK_API_QUALIFIER +MLK_API_MUST_CHECK_RETURN_VALUE +int MLK_API_NAMESPACE(enc_derand)( + uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET)], + uint8_t ss[MLKEM_BYTES], + const uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)], + const uint8_t coins[MLKEM_SYMBYTES] +#ifdef MLK_CONFIG_CONTEXT_PARAMETER + , + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context +#endif +); + +#if !defined(MLK_CONFIG_NO_RANDOMIZED_API) +/************************************************* + * Name: crypto_kem_enc + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text, an array of + * MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes. + * - uint8_t *ss: pointer to output shared secret, an array of + * MLKEM_BYTES bytes. + * - const uint8_t *pk: pointer to input public key, an array of + * MLKEM{512,768,1024}_PUBLICKEYBYTES bytes. + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the 'modulus check' @[FIPS203, Section 7.2] + * for the public key fails. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * - MLK_ERR_RNG_FAIL: Random number generation failed. + * + * Specification: Implements @[FIPS203, Algorithm 20, ML-KEM.Encaps] + * + **************************************************/ +MLK_API_QUALIFIER +MLK_API_MUST_CHECK_RETURN_VALUE +int MLK_API_NAMESPACE(enc)( + uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET)], + uint8_t ss[MLKEM_BYTES], + const uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)] +#ifdef MLK_CONFIG_CONTEXT_PARAMETER + , + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context +#endif +); +#endif /* !MLK_CONFIG_NO_RANDOMIZED_API */ + +/************************************************* + * Name: crypto_kem_dec + * + * Description: Generates shared secret for given + * cipher text and private key + * + * Arguments: - uint8_t *ss: pointer to output shared secret, an array of + * MLKEM_BYTES bytes. + * - const uint8_t *ct: pointer to input cipher text, an array of + * MLKEM{512,768,1024}_CIPHERTEXTBYTES bytes. + * - const uint8_t *sk: pointer to input private key, an array of + * MLKEM{512,768,1024}_SECRETKEYBYTES bytes. + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the 'hash check' @[FIPS203, Section 7.3] + * for the secret key fails. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Algorithm 21, ML-KEM.Decaps] + * + **************************************************/ +MLK_API_QUALIFIER +MLK_API_MUST_CHECK_RETURN_VALUE +int MLK_API_NAMESPACE(dec)( + uint8_t ss[MLKEM_BYTES], + const uint8_t ct[MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET)], + const uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)] +#ifdef MLK_CONFIG_CONTEXT_PARAMETER + , + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context +#endif +); + + +/************************************************* + * Name: crypto_kem_check_pk + * + * Description: Implements modulus check mandated by FIPS 203, + * i.e., ensures that coefficients are in [0,q-1]. + * + * Arguments: - const uint8_t *pk: pointer to input public key, an array of + * MLKEM{512,768,1024}_PUBLICKEYBYTES bytes. + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the modulus check failed. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Section 7.2, 'modulus check'] + * + **************************************************/ +MLK_API_QUALIFIER +MLK_API_MUST_CHECK_RETURN_VALUE +int MLK_API_NAMESPACE(check_pk)( + const uint8_t pk[MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)] +#ifdef MLK_CONFIG_CONTEXT_PARAMETER + , + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context +#endif +); + +/************************************************* + * Name: crypto_kem_check_sk + * + * Description: Implements public key hash check mandated by FIPS 203, + * i.e., ensures that + * sk[768π‘˜+32 ∢ 768π‘˜+64] = H(pk)= H(sk[384π‘˜ : 768π‘˜+32]) + * + * Arguments: - const uint8_t *sk: pointer to input private key, an array of + * MLKEM{512,768,1024}_SECRETKEYBYTES bytes. + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the public key hash check failed. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Section 7.3, 'hash check'] + * + **************************************************/ +MLK_API_QUALIFIER +MLK_API_MUST_CHECK_RETURN_VALUE +int MLK_API_NAMESPACE(check_sk)( + const uint8_t sk[MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET)] +#ifdef MLK_CONFIG_CONTEXT_PARAMETER + , + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context +#endif +); + +#ifdef __cplusplus +} +#endif + +/****************************** SUPERCOP API *********************************/ + +#if !defined(MLK_CONFIG_API_NO_SUPERCOP) +/* Export API in SUPERCOP naming scheme CRYPTO_xxx / crypto_kem_xxx */ +#define CRYPTO_SECRETKEYBYTES MLKEM_SECRETKEYBYTES(MLK_CONFIG_API_PARAMETER_SET) +#define CRYPTO_PUBLICKEYBYTES MLKEM_PUBLICKEYBYTES(MLK_CONFIG_API_PARAMETER_SET) +#define CRYPTO_CIPHERTEXTBYTES \ + MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_API_PARAMETER_SET) +#define CRYPTO_SYMBYTES MLKEM_SYMBYTES +#define CRYPTO_BYTES MLKEM_BYTES + +#define crypto_kem_keypair_derand MLK_API_NAMESPACE(keypair_derand) +#define crypto_kem_keypair MLK_API_NAMESPACE(keypair) +#define crypto_kem_enc_derand MLK_API_NAMESPACE(enc_derand) +#define crypto_kem_enc MLK_API_NAMESPACE(enc) +#define crypto_kem_dec MLK_API_NAMESPACE(dec) +#define crypto_kem_check_pk MLK_API_NAMESPACE(check_pk) +#define crypto_kem_check_sk MLK_API_NAMESPACE(check_sk) + +#else /* !MLK_CONFIG_API_NO_SUPERCOP */ + +/* If the SUPERCOP API is not needed, we can undefine the various helper macros + * above. Otherwise, they are needed for lazy evaluation of crypto_kem_xxx. */ +#if !defined(MLK_API_LEGACY_CONFIG) +#undef MLK_CONFIG_API_PARAMETER_SET +#undef MLK_CONFIG_API_NAMESPACE_PREFIX +#undef MLK_CONFIG_API_NO_SUPERCOP +#undef MLK_CONFIG_API_CONSTANTS_ONLY +#undef MLK_CONFIG_API_QUALIFIER +#endif /* !MLK_API_LEGACY_CONFIG */ + +#undef MLK_API_CONCAT +#undef MLK_API_CONCAT_ +#undef MLK_API_CONCAT_UNDERSCORE +#undef MLK_API_NAMESPACE +#undef MLK_API_MUST_CHECK_RETURN_VALUE +#undef MLK_API_QUALIFIER +#undef MLK_API_LEGACY_CONFIG + +#endif /* MLK_CONFIG_API_NO_SUPERCOP */ +#endif /* !MLK_CONFIG_API_CONSTANTS_ONLY */ + + +/***************************** Memory Usage **********************************/ + +/* + * By default mlkem-native performs all memory allocations on the stack. + * Alternatively, mlkem-native supports custom allocation of large structures + * through the `MLK_CONFIG_CUSTOM_ALLOC_FREE` configuration option. + * See mlkem_native_config.h for details. + * + * `MLK_TOTAL_ALLOC_{512,768,1024}_{KEYPAIR,ENCAPS,DECAPS}` indicates the + * maximum (accumulative) allocation via MLK_ALLOC for each parameter set and + * operation. Note that some stack allocation remains even when using custom + * allocators, so these values are lower than total stack usage with the default + * stack-only allocation. + * + * These constants may be used to implement custom allocations using a + * fixed-sized buffer and a simple allocator (e.g., bump allocator). + */ +/* check-magic: off */ +#define MLK_TOTAL_ALLOC_512_KEYPAIR_NO_PCT 5824 +#define MLK_TOTAL_ALLOC_512_KEYPAIR_PCT 10048 +#define MLK_TOTAL_ALLOC_512_ENCAPS 8384 +#define MLK_TOTAL_ALLOC_512_DECAPS 9152 +#define MLK_TOTAL_ALLOC_768_KEYPAIR_NO_PCT 10176 +#define MLK_TOTAL_ALLOC_768_KEYPAIR_PCT 15552 +#define MLK_TOTAL_ALLOC_768_ENCAPS 13248 +#define MLK_TOTAL_ALLOC_768_DECAPS 14336 +#define MLK_TOTAL_ALLOC_1024_KEYPAIR_NO_PCT 15552 +#define MLK_TOTAL_ALLOC_1024_KEYPAIR_PCT 22400 +#define MLK_TOTAL_ALLOC_1024_ENCAPS 19136 +#define MLK_TOTAL_ALLOC_1024_DECAPS 20704 +/* check-magic: on */ + +/* + * MLK_TOTAL_ALLOC_*_KEYPAIR adapts based on MLK_CONFIG_KEYGEN_PCT. + * For legacy config, we don't know which options are used, so assume + * the worst case (PCT enabled). + */ +#if defined(MLK_API_LEGACY_CONFIG) || defined(MLK_CONFIG_KEYGEN_PCT) +#define MLK_TOTAL_ALLOC_512_KEYPAIR MLK_TOTAL_ALLOC_512_KEYPAIR_PCT +#define MLK_TOTAL_ALLOC_768_KEYPAIR MLK_TOTAL_ALLOC_768_KEYPAIR_PCT +#define MLK_TOTAL_ALLOC_1024_KEYPAIR MLK_TOTAL_ALLOC_1024_KEYPAIR_PCT +#else +#define MLK_TOTAL_ALLOC_512_KEYPAIR MLK_TOTAL_ALLOC_512_KEYPAIR_NO_PCT +#define MLK_TOTAL_ALLOC_768_KEYPAIR MLK_TOTAL_ALLOC_768_KEYPAIR_NO_PCT +#define MLK_TOTAL_ALLOC_1024_KEYPAIR MLK_TOTAL_ALLOC_1024_KEYPAIR_NO_PCT +#endif + +#define MLK_MAX3_(a, b, c) \ + ((a) > (b) ? ((a) > (c) ? (a) : (c)) : ((b) > (c) ? (b) : (c))) + +/* + * `MLK_TOTAL_ALLOC_{512,768,1024}` is the maximum across all operations for + * each parameter set. + */ +#define MLK_TOTAL_ALLOC_512 \ + MLK_MAX3_(MLK_TOTAL_ALLOC_512_KEYPAIR, MLK_TOTAL_ALLOC_512_ENCAPS, \ + MLK_TOTAL_ALLOC_512_DECAPS) +#define MLK_TOTAL_ALLOC_768 \ + MLK_MAX3_(MLK_TOTAL_ALLOC_768_KEYPAIR, MLK_TOTAL_ALLOC_768_ENCAPS, \ + MLK_TOTAL_ALLOC_768_DECAPS) +#define MLK_TOTAL_ALLOC_1024 \ + MLK_MAX3_(MLK_TOTAL_ALLOC_1024_KEYPAIR, MLK_TOTAL_ALLOC_1024_ENCAPS, \ + MLK_TOTAL_ALLOC_1024_DECAPS) + +#endif /* !MLK_H */ diff --git a/mlkem_native/mlkem_native_config.h b/mlkem_native/mlkem_native_config.h new file mode 100644 index 0000000..19450f3 --- /dev/null +++ b/mlkem_native/mlkem_native_config.h @@ -0,0 +1,64 @@ +/* + * mlkem-native configuration for OnlyKey (NXP MK20DX256, Cortex-M4) + * ML-KEM-768 (FIPS 203), C-only portable backend + * + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#ifndef MLK_CONFIG_H +#define MLK_CONFIG_H + +/* ML-KEM-768 (NIST Level 3) */ +#ifndef MLK_CONFIG_PARAMETER_SET +#define MLK_CONFIG_PARAMETER_SET 768 +#endif + +/* Namespace prefix for symbols */ +#if !defined(MLK_CONFIG_NAMESPACE_PREFIX) +#define MLK_CONFIG_NAMESPACE_PREFIX MLK_DEFAULT_NAMESPACE_PREFIX +#endif + +/* No native assembly backends β€” Cortex-M4 not supported by existing + * AArch64/x86_64/RVV/Helium backends. Pure portable C. */ + +/* Build-only options */ +#if defined(MLK_BUILD_INTERNAL) + +/* + * Custom randombytes wrapper + * + * OnlyKey's existing randombytes has signature: + * void randombytes(unsigned char *x, unsigned long long xlen) + * + * mlkem-native expects: + * int mlk_randombytes(uint8_t *out, size_t outlen) β€” returns 0 on success + * + * We bridge with a custom wrapper. Implement onlykey_mlkem_randombytes() + * in okcrypto.cpp using your preferred entropy source. + */ +#define MLK_CONFIG_CUSTOM_RANDOMBYTES +#if !defined(__ASSEMBLER__) +#include +#include +#include "src/sys.h" + +extern int onlykey_mlkem_randombytes(uint8_t *out, size_t outlen); + +static MLK_INLINE int mlk_randombytes(uint8_t *out, size_t outlen) +{ + return onlykey_mlkem_randombytes(out, outlen); +} +#endif /* !__ASSEMBLER__ */ + +#endif /* MLK_BUILD_INTERNAL */ + +/* Default namespace */ +#if MLK_CONFIG_PARAMETER_SET == 512 +#define MLK_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_MLKEM512 +#elif MLK_CONFIG_PARAMETER_SET == 768 +#define MLK_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_MLKEM768 +#elif MLK_CONFIG_PARAMETER_SET == 1024 +#define MLK_DEFAULT_NAMESPACE_PREFIX PQCP_MLKEM_NATIVE_MLKEM1024 +#endif + +#endif /* !MLK_CONFIG_H */ diff --git a/mlkem_native/src/cbmc.h b/mlkem_native/src/cbmc.h new file mode 100644 index 0000000..80e1a36 --- /dev/null +++ b/mlkem_native/src/cbmc.h @@ -0,0 +1,174 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +#ifndef MLK_CBMC_H +#define MLK_CBMC_H +/*************************************************** + * Basic replacements for __CPROVER_XXX contracts + ***************************************************/ +#ifndef CBMC + +#define __contract__(x) +#define __loop__(x) + +#else /* !CBMC */ + + +#define __contract__(x) x +#define __loop__(x) x + +/* https://diffblue.github.io/cbmc/contracts-assigns.html */ +#define assigns(...) __CPROVER_assigns(__VA_ARGS__) + +/* https://diffblue.github.io/cbmc/contracts-requires-ensures.html */ +#define requires(...) __CPROVER_requires(__VA_ARGS__) +#define ensures(...) __CPROVER_ensures(__VA_ARGS__) +/* https://diffblue.github.io/cbmc/contracts-loops.html */ +#define invariant(...) __CPROVER_loop_invariant(__VA_ARGS__) +#define decreases(...) __CPROVER_decreases(__VA_ARGS__) +/* cassert to avoid confusion with in-built assert */ +#define cassert(x) __CPROVER_assert(x, "cbmc assertion failed") +#define assume(...) __CPROVER_assume(__VA_ARGS__) + +/*************************************************** + * Macros for "expression" forms that may appear + * _inside_ top-level contracts. + ***************************************************/ + +/* + * function return value - useful inside ensures + * https://diffblue.github.io/cbmc/contracts-functions.html + */ +#define return_value (__CPROVER_return_value) + +/* + * assigns l-value targets + * https://diffblue.github.io/cbmc/contracts-assigns.html + */ +#define object_whole(...) __CPROVER_object_whole(__VA_ARGS__) +#define memory_slice(...) __CPROVER_object_upto(__VA_ARGS__) + +/* + * Pointer-related predicates + * https://diffblue.github.io/cbmc/contracts-memory-predicates.html + */ +#define memory_no_alias(...) __CPROVER_is_fresh(__VA_ARGS__) +#define readable(...) __CPROVER_r_ok(__VA_ARGS__) +#define writeable(...) __CPROVER_w_ok(__VA_ARGS__) + +/* Maximum supported buffer size + * + * Larger buffers may be supported, but due to internal modeling constraints + * in CBMC, the proofs of memory- and type-safety won't be able to run. + * + * If you find yourself in need for a buffer size larger than this, + * please contact the maintainers, so we can prioritize work to relax + * this somewhat artificial bound. + */ +#define MLK_MAX_BUFFER_SIZE (SIZE_MAX >> 12) + +/* + * History variables + * https://diffblue.github.io/cbmc/contracts-history-variables.html + */ +#define old(...) __CPROVER_old(__VA_ARGS__) +#define loop_entry(...) __CPROVER_loop_entry(__VA_ARGS__) + +/* + * Quantifiers + * Note that the range on qvar is _exclusive_ between qvar_lb .. qvar_ub + * https://diffblue.github.io/cbmc/contracts-quantifiers.html + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define forall(qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_forall \ + { \ + unsigned qvar; \ + ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> (predicate) \ + } + +#define exists(qvar, qvar_lb, qvar_ub, predicate) \ + __CPROVER_exists \ + { \ + unsigned qvar; \ + ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) && (predicate) \ + } +/* clang-format on */ + +/*************************************************** + * Convenience macros for common contract patterns + ***************************************************/ + +/* + * Boolean-value predidate that asserts that "all values of array_var are in + * range value_lb (inclusive) .. value_ub (exclusive)" + * Example: + * array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q) + * expands to + * __CPROVER_forall { int k; (0 <= k && k <= MLKEM_N-1) ==> ( + * 0 <= a->coeffs[k]) && a->coeffs[k] < MLKEM_Q)) } + */ + +/* + * Prevent clang-format from corrupting CBMC's special ==> operator + */ +/* clang-format off */ +#define CBMC_CONCAT_(left, right) left##right +#define CBMC_CONCAT(left, right) CBMC_CONCAT_(left, right) + +#define array_bound_core(qvar, qvar_lb, qvar_ub, array_var, \ + value_lb, value_ub) \ + __CPROVER_forall \ + { \ + unsigned qvar; \ + ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ + (((int)(value_lb) <= ((array_var)[(qvar)])) && \ + (((array_var)[(qvar)]) < (int)(value_ub))) \ + } + +#define array_bound(array_var, qvar_lb, qvar_ub, value_lb, value_ub) \ + array_bound_core(CBMC_CONCAT(_cbmc_idx, __COUNTER__), (qvar_lb), \ + (qvar_ub), (array_var), (value_lb), (value_ub)) + +#define array_unchanged_core(qvar, qvar_lb, qvar_ub, array_var) \ + __CPROVER_forall \ + { \ + unsigned qvar; \ + ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ + ((array_var)[(qvar)]) == (old(* (int16_t (*)[(qvar_ub)])(array_var)))[(qvar)] \ + } + +#define array_unchanged(array_var, N) \ + array_unchanged_core(CBMC_CONCAT(_cbmc_idx, __COUNTER__), 0, (N), (array_var)) + +#define array_unchanged_u64_core(qvar, qvar_lb, qvar_ub, array_var) \ + __CPROVER_forall \ + { \ + unsigned qvar; \ + ((qvar_lb) <= (qvar) && (qvar) < (qvar_ub)) ==> \ + ((array_var)[(qvar)]) == (old(* (uint64_t (*)[(qvar_ub)])(array_var)))[(qvar)] \ + } + +#define array_unchanged_u64(array_var, N) \ + array_unchanged_u64_core(CBMC_CONCAT(_cbmc_idx, __COUNTER__), 0, (N), (array_var)) +/* clang-format on */ + +/* Wrapper around array_bound operating on absolute values. + * + * The absolute value bound `k` is exclusive. + * + * Note that since the lower bound in array_bound is inclusive, we have to + * raise it by 1 here. + */ +#define array_abs_bound(arr, lb, ub, k) \ + array_bound((arr), (lb), (ub), -((int)(k)) + 1, (k)) + +#endif /* CBMC */ + +#endif /* !MLK_CBMC_H */ diff --git a/mlkem_native/src/common.h b/mlkem_native/src/common.h new file mode 100644 index 0000000..bc4e9ed --- /dev/null +++ b/mlkem_native/src/common.h @@ -0,0 +1,274 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_COMMON_H +#define MLK_COMMON_H + +#ifndef __ASSEMBLER__ +#include +#endif + +#define MLK_BUILD_INTERNAL + +#if defined(MLK_CONFIG_FILE) +#include MLK_CONFIG_FILE +#else +#include "mlkem_native_config.h" +#endif + +#include "params.h" +#include "sys.h" + +/* Internal and public API have external linkage by default, but + * this can be overwritten by the user, e.g. for single-CU builds. */ +#if !defined(MLK_CONFIG_INTERNAL_API_QUALIFIER) +#define MLK_INTERNAL_API +#else +#define MLK_INTERNAL_API MLK_CONFIG_INTERNAL_API_QUALIFIER +#endif + +#if !defined(MLK_CONFIG_EXTERNAL_API_QUALIFIER) +#define MLK_EXTERNAL_API +#else +#define MLK_EXTERNAL_API MLK_CONFIG_EXTERNAL_API_QUALIFIER +#endif + +#define MLK_CONCAT_(x1, x2) x1##x2 +#define MLK_CONCAT(x1, x2) MLK_CONCAT_(x1, x2) + +#if (defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || \ + defined(MLK_CONFIG_MULTILEVEL_NO_SHARED)) +#define MLK_ADD_PARAM_SET(s) MLK_CONCAT(s, MLK_CONFIG_PARAMETER_SET) +#else +#define MLK_ADD_PARAM_SET(s) s +#endif + +#define MLK_NAMESPACE_PREFIX MLK_CONCAT(MLK_CONFIG_NAMESPACE_PREFIX, _) +#define MLK_NAMESPACE_PREFIX_K \ + MLK_CONCAT(MLK_ADD_PARAM_SET(MLK_CONFIG_NAMESPACE_PREFIX), _) + +/* Functions are prefixed by MLK_CONFIG_NAMESPACE_PREFIX. + * + * If multiple parameter sets are used, functions depending on the parameter + * set are additionally prefixed with 512/768/1024. See mlkem_native_config.h. + * + * Example: If MLK_CONFIG_NAMESPACE_PREFIX is mlkem, then + * MLK_NAMESPACE_K(enc) becomes mlkem512_enc/mlkem768_enc/mlkem1024_enc. + */ +#define MLK_NAMESPACE(s) MLK_CONCAT(MLK_NAMESPACE_PREFIX, s) +#define MLK_NAMESPACE_K(s) MLK_CONCAT(MLK_NAMESPACE_PREFIX_K, s) + +/* On Apple platforms, we need to emit leading underscore + * in front of assembly symbols. We thus introducee a separate + * namespace wrapper for ASM symbols. */ +#if !defined(__APPLE__) +#define MLK_ASM_NAMESPACE(sym) MLK_NAMESPACE(sym) +#else +#define MLK_ASM_NAMESPACE(sym) MLK_CONCAT(_, MLK_NAMESPACE(sym)) +#endif + +/* + * On X86_64 if control-flow protections (CET) are enabled (through + * -fcf-protection=), we add an endbr64 instruction at every global function + * label. See sys.h for more details + */ +#if defined(MLK_SYS_X86_64) +#define MLK_ASM_FN_SYMBOL(sym) MLK_ASM_NAMESPACE(sym) : MLK_CET_ENDBR +#elif defined(MLK_SYS_ARMV81M_MVE) +/* clang-format off */ +#define MLK_ASM_FN_SYMBOL(sym) \ + .type MLK_ASM_NAMESPACE(sym), %function; \ + MLK_ASM_NAMESPACE(sym) : +/* clang-format on */ +#else /* !MLK_SYS_X86_64 && MLK_SYS_ARMV81M_MVE */ +#define MLK_ASM_FN_SYMBOL(sym) MLK_ASM_NAMESPACE(sym) : +#endif /* !MLK_SYS_X86_64 && !MLK_SYS_ARMV81M_MVE */ + +/* + * Output the size of an assembly function. + */ +#if defined(__ELF__) +#define MLK_ASM_FN_SIZE(sym) \ + .size MLK_ASM_NAMESPACE(sym), .- MLK_ASM_NAMESPACE(sym) +#else +#define MLK_ASM_FN_SIZE(sym) +#endif + +/* We aim to simplify the user's life by supporting builds where + * all source files are included, even those that are not needed. + * Those files are appropriately guarded and will be empty when unneeded. + * The following is to avoid compilers complaining about this. */ +#define MLK_EMPTY_CU(s) extern int MLK_NAMESPACE_K(empty_cu_##s); + +/* MLK_CONFIG_NO_ASM takes precedence over MLK_USE_NATIVE_XXX */ +#if defined(MLK_CONFIG_NO_ASM) +#undef MLK_CONFIG_USE_NATIVE_BACKEND_ARITH +#undef MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 +#endif + +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH) && \ + !defined(MLK_CONFIG_ARITH_BACKEND_FILE) +#error Bad configuration: MLK_CONFIG_USE_NATIVE_BACKEND_ARITH is set, but MLK_CONFIG_ARITH_BACKEND_FILE is not. +#endif + +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202) && \ + !defined(MLK_CONFIG_FIPS202_BACKEND_FILE) +#error Bad configuration: MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 is set, but MLK_CONFIG_FIPS202_BACKEND_FILE is not. +#endif + +#if defined(MLK_CONFIG_NO_RANDOMIZED_API) && defined(MLK_CONFIG_KEYGEN_PCT) +#error Bad configuration: MLK_CONFIG_NO_RANDOMIZED_API is incompatible with MLK_CONFIG_KEYGEN_PCT as the current PCT implementation requires crypto_kem_enc() +#endif + +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_ARITH) +#include MLK_CONFIG_ARITH_BACKEND_FILE +/* Include to enforce consistency of API and implementation, + * and conduct sanity checks on the backend. + * + * Keep this _after_ the inclusion of the backend; otherwise, + * the sanity checks won't have an effect. */ +#if defined(MLK_CHECK_APIS) && !defined(__ASSEMBLER__) +#include "native/api.h" +#endif +#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_ARITH */ + +#if defined(MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202) +#include MLK_CONFIG_FIPS202_BACKEND_FILE +/* Include to enforce consistency of API and implementation, + * and conduct sanity checks on the backend. + * + * Keep this _after_ the inclusion of the backend; otherwise, + * the sanity checks won't have an effect. */ +#if defined(MLK_CHECK_APIS) && !defined(__ASSEMBLER__) +#include "fips202/native/api.h" +#endif +#endif /* MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 */ + +#if !defined(MLK_CONFIG_FIPS202_CUSTOM_HEADER) +#define MLK_FIPS202_HEADER_FILE "fips202/fips202.h" +#else +#define MLK_FIPS202_HEADER_FILE MLK_CONFIG_FIPS202_CUSTOM_HEADER +#endif + +#if !defined(MLK_CONFIG_FIPS202X4_CUSTOM_HEADER) +#define MLK_FIPS202X4_HEADER_FILE "fips202/fips202x4.h" +#else +#define MLK_FIPS202X4_HEADER_FILE MLK_CONFIG_FIPS202X4_CUSTOM_HEADER +#endif + +/* Standard library function replacements */ +#if !defined(__ASSEMBLER__) +#if !defined(MLK_CONFIG_CUSTOM_MEMCPY) +#include +#define mlk_memcpy memcpy +#endif + +#if !defined(MLK_CONFIG_CUSTOM_MEMSET) +#include +#define mlk_memset memset +#endif + + +/* Allocation macros for large local structures + * + * MLK_ALLOC(v, T, N) declares T *v and attempts to point it to an T[N] + * MLK_FREE(v, T, N) zeroizes and frees the allocation + * + * Default implementation uses stack allocation. + * Can be overridden by setting the config option MLK_CONFIG_CUSTOM_ALLOC_FREE + * and defining MLK_CUSTOM_ALLOC and MLK_CUSTOM_FREE. + */ +#if defined(MLK_CONFIG_CUSTOM_ALLOC_FREE) != \ + (defined(MLK_CUSTOM_ALLOC) && defined(MLK_CUSTOM_FREE)) +#error Bad configuration: MLK_CONFIG_CUSTOM_ALLOC_FREE must be set together with MLK_CUSTOM_ALLOC and MLK_CUSTOM_FREE +#endif + +/* + * If the integration wants to provide a context parameter for use in + * platform-specific hooks, then it should define this parameter. + * + * The MLK_CONTEXT_PARAMETERS_n macros are intended to be used with macros + * defining the function names and expand to either pass or discard the context + * argument as required by the current build. If there is no context parameter + * requested then these are removed from the prototypes and from all calls. + */ +#ifdef MLK_CONFIG_CONTEXT_PARAMETER +#define MLK_CONTEXT_PARAMETERS_0(context) (context) +#define MLK_CONTEXT_PARAMETERS_1(arg0, context) (arg0, context) +#define MLK_CONTEXT_PARAMETERS_2(arg0, arg1, context) (arg0, arg1, context) +#define MLK_CONTEXT_PARAMETERS_3(arg0, arg1, arg2, context) \ + (arg0, arg1, arg2, context) +#define MLK_CONTEXT_PARAMETERS_4(arg0, arg1, arg2, arg3, context) \ + (arg0, arg1, arg2, arg3, context) +#else /* MLK_CONFIG_CONTEXT_PARAMETER */ +#define MLK_CONTEXT_PARAMETERS_0(context) () +#define MLK_CONTEXT_PARAMETERS_1(arg0, context) (arg0) +#define MLK_CONTEXT_PARAMETERS_2(arg0, arg1, context) (arg0, arg1) +#define MLK_CONTEXT_PARAMETERS_3(arg0, arg1, arg2, context) (arg0, arg1, arg2) +#define MLK_CONTEXT_PARAMETERS_4(arg0, arg1, arg2, arg3, context) \ + (arg0, arg1, arg2, arg3) +#endif /* !MLK_CONFIG_CONTEXT_PARAMETER */ + +#if defined(MLK_CONFIG_CONTEXT_PARAMETER_TYPE) != \ + defined(MLK_CONFIG_CONTEXT_PARAMETER) +#error MLK_CONFIG_CONTEXT_PARAMETER_TYPE must be defined if and only if MLK_CONFIG_CONTEXT_PARAMETER is defined +#endif + +#if !defined(MLK_CONFIG_CUSTOM_ALLOC_FREE) +/* Default: stack allocation */ + +#define MLK_ALLOC(v, T, N, context) \ + MLK_ALIGN T mlk_alloc_##v[N]; \ + T *v = mlk_alloc_##v + +/* TODO: This leads to a circular dependency between common and verify.h + * It just works out before we're at the end of the file, but it's still + * prone to issues in the future. */ +#include "verify.h" +#define MLK_FREE(v, T, N, context) \ + do \ + { \ + mlk_zeroize(mlk_alloc_##v, sizeof(mlk_alloc_##v)); \ + (v) = NULL; \ + } while (0) + +#else /* !MLK_CONFIG_CUSTOM_ALLOC_FREE */ + +/* Custom allocation */ + +/* + * The indirection here is necessary to use MLK_CONTEXT_PARAMETERS_3 here. + */ +#define MLK_APPLY(f, args) f args + +#define MLK_ALLOC(v, T, N, context) \ + MLK_APPLY(MLK_CUSTOM_ALLOC, MLK_CONTEXT_PARAMETERS_3(v, T, N, context)) + +#define MLK_FREE(v, T, N, context) \ + do \ + { \ + if (v != NULL) \ + { \ + mlk_zeroize(v, sizeof(T) * (N)); \ + MLK_APPLY(MLK_CUSTOM_FREE, MLK_CONTEXT_PARAMETERS_3(v, T, N, context)); \ + v = NULL; \ + } \ + } while (0) + +#endif /* MLK_CONFIG_CUSTOM_ALLOC_FREE */ + +/****************************** Error codes ***********************************/ + +/* Generic failure condition */ +#define MLK_ERR_FAIL -1 +/* An allocation failed. This can only happen if MLK_CONFIG_CUSTOM_ALLOC_FREE + * is defined and the provided MLK_CUSTOM_ALLOC can fail. */ +#define MLK_ERR_OUT_OF_MEMORY -2 +/* An rng failure occured. Might be due to insufficient entropy or + * system misconfiguration. */ +#define MLK_ERR_RNG_FAIL -3 + +#endif /* !__ASSEMBLER__ */ + +#endif /* !MLK_COMMON_H */ diff --git a/mlkem_native/src/compress.c b/mlkem_native/src/compress.c new file mode 100644 index 0000000..50da36d --- /dev/null +++ b/mlkem_native/src/compress.c @@ -0,0 +1,717 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#include "common.h" +#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) + + +#include "cbmc.h" +#include "compress.h" +#include "debug.h" +#include "verify.h" + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) +/* Reference: `poly_compress()` in the reference implementation @[REF], + * for ML-KEM-{512,768}. + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ +MLK_STATIC_TESTABLE void mlk_poly_compress_d4_c( + uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D4)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D4)) +) +{ + unsigned i; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) + { + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 16))) + { + t[j] = mlk_scalar_compress_d4(a->coeffs[8 * i + j]); + } + + /* All t[i] are 4-bit wide, so the truncations don't alter the value. */ + r[i * 4] = (uint8_t)(t[0] | (t[1] << 4)); + r[i * 4 + 1] = (uint8_t)(t[2] | (t[3] << 4)); + r[i * 4 + 2] = (uint8_t)(t[4] | (t[5] << 4)); + r[i * 4 + 3] = (uint8_t)(t[6] | (t[7] << 4)); + } +} + +MLK_INTERNAL_API +void mlk_poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], + const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D4)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D4)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D4) + int ret; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + ret = mlk_poly_compress_d4_native(r, a->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D4 */ + + mlk_poly_compress_d4_c(r, a); +} + +/* Reference: Embedded into `polyvec_compress()` in the + * reference implementation, for ML-KEM-{512,768}. + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ +MLK_STATIC_TESTABLE void mlk_poly_compress_d10_c( + uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D10)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D10)) +) +{ + unsigned j; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + for (j = 0; j < MLKEM_N / 4; j++) + __loop__(invariant(j <= MLKEM_N / 4)) + { + unsigned k; + uint16_t t[4]; + for (k = 0; k < 4; k++) + __loop__( + invariant(k <= 4) + invariant(forall(r, 0, k, t[r] < (1u << 10)))) + { + t[k] = mlk_scalar_compress_d10(a->coeffs[4 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 10-bit in size. + */ + r[5 * j + 0] = (uint8_t)((t[0] >> 0) & 0xFF); + r[5 * j + 1] = (uint8_t)((t[0] >> 8) | ((t[1] << 2) & 0xFF)); + r[5 * j + 2] = (uint8_t)((t[1] >> 6) | ((t[2] << 4) & 0xFF)); + r[5 * j + 3] = (uint8_t)((t[2] >> 4) | ((t[3] << 6) & 0xFF)); + r[5 * j + 4] = (uint8_t)(t[3] >> 2); + } +} + +MLK_INTERNAL_API +void mlk_poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], + const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D10)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D10)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D10) + int ret; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + ret = mlk_poly_compress_d10_native(r, a->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D10 */ + + mlk_poly_compress_d10_c(r, a); +} + +/* Reference: `poly_decompress()` in the reference implementation @[REF], + * for ML-KEM-{512,768}. */ +MLK_STATIC_TESTABLE void mlk_poly_decompress_d4_c( + mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D4)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_Q))) + { + r->coeffs[2 * i + 0] = mlk_scalar_decompress_d4((a[i] >> 0) & 0xF); + r->coeffs[2 * i + 1] = mlk_scalar_decompress_d4((a[i] >> 4) & 0xF); + } + + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLK_INTERNAL_API +void mlk_poly_decompress_d4(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D4)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D4) + int ret; + ret = mlk_poly_decompress_d4_native(r->coeffs, a); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); + return; + } +#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D4 */ + + mlk_poly_decompress_d4_c(r, a); +} + +/* Reference: Embedded into `polyvec_decompress()` in the + * reference implementation, for ML-KEM-{512,768}. */ +MLK_STATIC_TESTABLE void mlk_poly_decompress_d10_c( + mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D10)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 4; j++) + __loop__( + invariant(j <= MLKEM_N / 4) + invariant(array_bound(r->coeffs, 0, 4 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[4]; + uint8_t const *base = &a[5 * j]; + + t[0] = 0x3FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x3FF & ((base[1] >> 2) | ((uint16_t)base[2] << 6)); + t[2] = 0x3FF & ((base[2] >> 4) | ((uint16_t)base[3] << 4)); + t[3] = 0x3FF & ((base[3] >> 6) | ((uint16_t)base[4] << 2)); + + for (k = 0; k < 4; k++) + __loop__( + invariant(k <= 4) + invariant(array_bound(r->coeffs, 0, 4 * j + k, 0, MLKEM_Q))) + { + r->coeffs[4 * j + k] = mlk_scalar_decompress_d10(t[k]); + } + } + + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLK_INTERNAL_API +void mlk_poly_decompress_d10(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D10)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D10) + int ret; + ret = mlk_poly_decompress_d10_native(r->coeffs, a); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); + return; + } +#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D10 */ + + mlk_poly_decompress_d10_c(r, a); +} +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */ + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4 +/* Reference: `poly_compress()` in the reference implementation @[REF], + * for ML-KEM-1024. + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ +MLK_STATIC_TESTABLE void mlk_poly_compress_d5_c( + uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D5)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D5)) +) +{ + unsigned i; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) + { + unsigned j; + uint8_t t[8] = {0}; + for (j = 0; j < 8; j++) + __loop__( + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_bound(t, 0, j, 0, 32))) + { + t[j] = mlk_scalar_compress_d5(a->coeffs[8 * i + j]); + } + + r[i * 5] = (uint8_t)(0xFF & ((t[0] >> 0) | (t[1] << 5))); + r[i * 5 + 1] = (uint8_t)(0xFF & ((t[1] >> 3) | (t[2] << 2) | (t[3] << 7))); + r[i * 5 + 2] = (uint8_t)(0xFF & ((t[3] >> 1) | (t[4] << 4))); + r[i * 5 + 3] = (uint8_t)(0xFF & ((t[4] >> 4) | (t[5] << 1) | (t[6] << 6))); + r[i * 5 + 4] = (uint8_t)(0xFF & ((t[6] >> 2) | (t[7] << 3))); + } +} + +MLK_INTERNAL_API +void mlk_poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], + const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D5)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D5)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D5) + int ret; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + ret = mlk_poly_compress_d5_native(r, a->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D5 */ + + mlk_poly_compress_d5_c(r, a); +} + +/* Reference: Embedded into `polyvec_compress()` in the + * reference implementation, for ML-KEM-1024. + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ +MLK_STATIC_TESTABLE void mlk_poly_compress_d11_c( + uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D11)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D11)) +) +{ + unsigned j; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (j = 0; j < MLKEM_N / 8; j++) + __loop__(invariant(j <= MLKEM_N / 8)) + { + unsigned k; + uint16_t t[8]; + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(forall(r, 0, k, t[r] < (1u << 11)))) + { + t[k] = mlk_scalar_compress_d11(a->coeffs[8 * j + k]); + } + + /* + * Make all implicit truncation explicit. No data is being + * truncated for the LHS's since each t[i] is 11-bit in size. + */ + r[11 * j + 0] = (uint8_t)((t[0] >> 0) & 0xFF); + r[11 * j + 1] = (uint8_t)((t[0] >> 8) | ((t[1] << 3) & 0xFF)); + r[11 * j + 2] = (uint8_t)((t[1] >> 5) | ((t[2] << 6) & 0xFF)); + r[11 * j + 3] = (uint8_t)((t[2] >> 2) & 0xFF); + r[11 * j + 4] = (uint8_t)((t[2] >> 10) | ((t[3] << 1) & 0xFF)); + r[11 * j + 5] = (uint8_t)((t[3] >> 7) | ((t[4] << 4) & 0xFF)); + r[11 * j + 6] = (uint8_t)((t[4] >> 4) | ((t[5] << 7) & 0xFF)); + r[11 * j + 7] = (uint8_t)((t[5] >> 1) & 0xFF); + r[11 * j + 8] = (uint8_t)((t[5] >> 9) | ((t[6] << 2) & 0xFF)); + r[11 * j + 9] = (uint8_t)((t[6] >> 6) | ((t[7] << 5) & 0xFF)); + r[11 * j + 10] = (uint8_t)(t[7] >> 3); + } +} + +MLK_INTERNAL_API +void mlk_poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], + const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_D11)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_D11)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_COMPRESS_D11) + int ret; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + ret = mlk_poly_compress_d11_native(r, a->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_NATIVE_POLY_COMPRESS_D11 */ + + mlk_poly_compress_d11_c(r, a); +} + +/* Reference: `poly_decompress()` in the reference implementation @[REF], + * for ML-KEM-1024. */ +MLK_STATIC_TESTABLE void mlk_poly_decompress_d5_c( + mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D5)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ + unsigned i; + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) + { + unsigned j; + uint8_t t[8]; + const unsigned offset = i * 5; + /* + * Explicitly truncate to avoid warning about + * implicit truncation in CBMC and unwind loop for ease + * of proof. + */ + + /* + * Decompress 5 8-bit bytes (so 40 bits) into + * 8 5-bit values stored in t[] + */ + t[0] = 0x1F & (a[offset + 0] >> 0); + t[1] = 0x1F & ((a[offset + 0] >> 5) | (a[offset + 1] << 3)); + t[2] = 0x1F & (a[offset + 1] >> 2); + t[3] = 0x1F & ((a[offset + 1] >> 7) | (a[offset + 2] << 1)); + t[4] = 0x1F & ((a[offset + 2] >> 4) | (a[offset + 3] << 4)); + t[5] = 0x1F & (a[offset + 3] >> 1); + t[6] = 0x1F & ((a[offset + 3] >> 6) | (a[offset + 4] << 2)); + t[7] = 0x1F & (a[offset + 4] >> 3); + + /* and copy to the correct slice in r[] */ + for (j = 0; j < 8; j++) + __loop__( + invariant(j <= 8 && i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) + { + r->coeffs[8 * i + j] = mlk_scalar_decompress_d5(t[j]); + } + } + + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLK_INTERNAL_API +void mlk_poly_decompress_d5(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D5)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D5) + int ret; + ret = mlk_poly_decompress_d5_native(r->coeffs, a); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); + return; + } +#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D5 */ + + mlk_poly_decompress_d5_c(r, a); +} + +/* Reference: Embedded into `polyvec_decompress()` in the + * reference implementation, for ML-KEM-1024. */ +MLK_STATIC_TESTABLE void mlk_poly_decompress_d11_c( + mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D11)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ + unsigned j; + for (j = 0; j < MLKEM_N / 8; j++) + __loop__( + invariant(j <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * j, 0, MLKEM_Q))) + { + unsigned k; + uint16_t t[8]; + uint8_t const *base = &a[11 * j]; + t[0] = 0x7FF & ((base[0] >> 0) | ((uint16_t)base[1] << 8)); + t[1] = 0x7FF & ((base[1] >> 3) | ((uint16_t)base[2] << 5)); + t[2] = 0x7FF & ((base[2] >> 6) | ((uint16_t)base[3] << 2) | + ((uint16_t)base[4] << 10)); + t[3] = 0x7FF & ((base[4] >> 1) | ((uint16_t)base[5] << 7)); + t[4] = 0x7FF & ((base[5] >> 4) | ((uint16_t)base[6] << 4)); + t[5] = 0x7FF & ((base[6] >> 7) | ((uint16_t)base[7] << 1) | + ((uint16_t)base[8] << 9)); + t[6] = 0x7FF & ((base[8] >> 2) | ((uint16_t)base[9] << 6)); + t[7] = 0x7FF & ((base[9] >> 5) | ((uint16_t)base[10] << 3)); + + for (k = 0; k < 8; k++) + __loop__( + invariant(k <= 8) + invariant(array_bound(r->coeffs, 0, 8 * j + k, 0, MLKEM_Q))) + { + r->coeffs[8 * j + k] = mlk_scalar_decompress_d11(t[k]); + } + } + + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLK_INTERNAL_API +void mlk_poly_decompress_d11(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_D11)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ +#if defined(MLK_USE_NATIVE_POLY_DECOMPRESS_D11) + int ret; + ret = mlk_poly_decompress_d11_native(r->coeffs, a); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); + return; + } +#endif /* MLK_USE_NATIVE_POLY_DECOMPRESS_D11 */ + + mlk_poly_decompress_d11_c(r, a); +} + +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */ + +/* Reference: `poly_tobytes()` in the reference implementation @[REF]. + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ +MLK_STATIC_TESTABLE void mlk_poly_tobytes_c(uint8_t r[MLKEM_POLYBYTES], + const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYBYTES)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYBYTES)) +) +{ + unsigned i; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 2; i++) + __loop__(invariant(i <= MLKEM_N / 2)) + { + /* The conversion to uint16_t is safe since we assume that + * the coefficients of `a` are non-negative. */ + const uint16_t t0 = (uint16_t)a->coeffs[2 * i]; + const uint16_t t1 = (uint16_t)a->coeffs[2 * i + 1]; + /* + * t0 and t1 are both < MLKEM_Q, so contain at most 12 bits each of + * significant data, so these can be packed into 24 bits or exactly + * 3 bytes, as follows. + */ + + /* Least significant bits 0 - 7 of t0. */ + r[3 * i + 0] = (uint8_t)(t0 & 0xFF); + + /* + * Most significant bits 8 - 11 of t0 become the least significant + * nibble of the second byte. The least significant 4 bits + * of t1 become the upper nibble of the second byte. + * + * The conversion to uint8_t does not alter the value. + */ + r[3 * i + 1] = (uint8_t)((t0 >> 8) | ((t1 << 4) & 0xF0)); + + /* Bits 4 - 11 of t1 become the third byte. The conversion to uint8_t + * does not alter the value because t1 is 12-bit wide. */ + r[3 * i + 2] = (uint8_t)(t1 >> 4); + } +} + +MLK_INTERNAL_API +void mlk_poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const mlk_poly *a) +{ +#if defined(MLK_USE_NATIVE_POLY_TOBYTES) + int ret; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + ret = mlk_poly_tobytes_native(r, a->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_NATIVE_POLY_TOBYTES */ + + mlk_poly_tobytes_c(r, a); +} + +/* Reference: `poly_frombytes()` in the reference implementation @[REF]. */ +MLK_STATIC_TESTABLE void mlk_poly_frombytes_c(mlk_poly *r, + const uint8_t a[MLKEM_POLYBYTES]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYBYTES)) + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)) +) +{ + unsigned i; + for (i = 0; i < MLKEM_N / 2; i++) + __loop__( + invariant(i <= MLKEM_N / 2) + invariant(array_bound(r->coeffs, 0, 2 * i, 0, MLKEM_UINT12_LIMIT))) + { + const uint8_t t0 = a[3 * i + 0]; + const uint8_t t1 = a[3 * i + 1]; + const uint8_t t2 = a[3 * i + 2]; + r->coeffs[2 * i + 0] = (int16_t)(t0 | ((t1 << 8) & 0xFFF)); + r->coeffs[2 * i + 1] = (int16_t)((t1 >> 4) | (t2 << 4)); + } + + /* Note that the coefficients are not canonical */ + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_UINT12_LIMIT); +} + +MLK_INTERNAL_API +void mlk_poly_frombytes(mlk_poly *r, const uint8_t a[MLKEM_POLYBYTES]) +{ +#if defined(MLK_USE_NATIVE_POLY_FROMBYTES) + int ret; + ret = mlk_poly_frombytes_native(r->coeffs, a); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_NATIVE_POLY_FROMBYTES */ + + mlk_poly_frombytes_c(r, a); +} + +/* Reference: `poly_frommsg()` in the reference implementation @[REF]. + * - We use a value barrier around the bit-selection mask to + * reduce the risk of compiler-introduced branches. + * The reference implementation contains the expression + * `(msg[i] >> j) & 1` which the compiler can reason must + * be either 0 or 1. */ +MLK_INTERNAL_API +void mlk_poly_frommsg(mlk_poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +{ + unsigned i; +#if (MLKEM_INDCPA_MSGBYTES != MLKEM_N / 8) +#error "MLKEM_INDCPA_MSGBYTES must be equal to MLKEM_N/8 bytes!" +#endif + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i <= MLKEM_N / 8) + invariant(array_bound(r->coeffs, 0, 8 * i, 0, MLKEM_Q))) + { + unsigned j; + for (j = 0; j < 8; j++) + __loop__( + invariant(i < MLKEM_N / 8 && j <= 8) + invariant(array_bound(r->coeffs, 0, 8 * i + j, 0, MLKEM_Q))) + { + /* mlk_ct_sel_int16(MLKEM_Q_HALF, 0, b) is `Decompress_1(b != 0)` + * as per @[FIPS203, Eq (4.8)]. */ + + /* Prevent the compiler from recognizing this as a bit selection */ + uint8_t mask = mlk_value_barrier_u8((uint8_t)(1u << j)); + r->coeffs[8 * i + j] = mlk_ct_sel_int16(MLKEM_Q_HALF, 0, msg[i] & mask); + } + } + mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q); +} + +/* Reference: `poly_tomsg()` in the reference implementation @[REF]. + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). + */ +MLK_INTERNAL_API +void mlk_poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const mlk_poly *a) +{ + unsigned i; + mlk_assert_bound(a, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_N / 8; i++) + __loop__(invariant(i <= MLKEM_N / 8)) + { + unsigned j; + msg[i] = 0; + for (j = 0; j < 8; j++) + __loop__( + invariant(i <= MLKEM_N / 8 && j <= 8)) + { + uint32_t t = mlk_scalar_compress_d1(a->coeffs[8 * i + j]); + msg[i] |= (uint8_t)(t << j); + } + } +} + +#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +MLK_EMPTY_CU(compress) + +#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */ diff --git a/mlkem_native/src/compress.h b/mlkem_native/src/compress.h new file mode 100644 index 0000000..b16b088 --- /dev/null +++ b/mlkem_native/src/compress.h @@ -0,0 +1,688 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#ifndef MLK_COMPRESS_H +#define MLK_COMPRESS_H + + +#include "cbmc.h" +#include "common.h" +#include "debug.h" +#include "poly.h" +#include "verify.h" + +/************************************************************ + * Name: mlk_scalar_compress_d1 + * + * Description: Computes round(u * 2 / q) + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + * + * Specification: Compress_1 from @[FIPS203, Eq (4.7)]. + * + ************************************************************/ + +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/* Reference: Part of poly_tomsg() in the reference implementation @[REF]. */ +static MLK_INLINE uint8_t mlk_scalar_compress_d1(int16_t u) +__contract__( + requires(0 <= u && u <= MLKEM_Q - 1) + ensures(return_value < 2) + ensures(return_value == (((uint32_t)u * 2 + MLKEM_Q / 2) / MLKEM_Q) % 2) ) +{ + /* Compute as follows: + * ``` + * round(u * 2 / MLKEM_Q) + * = round(u * 2 * (2^31 / MLKEM_Q) / 2^31) + * ~= round(u * 2 * round(2^31 / MLKEM_Q) / 2^31) + * ``` + */ + /* check-magic: 1290168 == 2*round(2^31 / MLKEM_Q) */ + uint32_t d0 = (uint32_t)u * 1290168; + /* Unsigned shifting by 31 positions leaves only the top bit. */ + return (uint8_t)((d0 + ((uint32_t)1u << 30)) >> 31); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: mlk_scalar_compress_d4 + * + * Description: Computes round(u * 16 / q) % 16 + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + * + * Specification: Compress_4 from @[FIPS203, Eq (4.7)]. + * + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/* Reference: Embedded into `poly_compress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE uint8_t mlk_scalar_compress_d4(int16_t u) +__contract__( + requires(0 <= u && u <= MLKEM_Q - 1) + ensures(return_value < 16) + ensures(return_value == (((uint32_t)u * 16 + MLKEM_Q / 2) / MLKEM_Q) % 16)) +{ + /* Compute as follows: + * ``` + * round(u * 16 / MLKEM_Q) + * = round(u * 16 * (2^28 / MLKEM_Q) / 2^28) + * ~= round(u * 16 * round(2^28 / MLKEM_Q) / 2^28) + * ``` + */ + /* check-magic: 1290160 == 16 * round(2^28 / MLKEM_Q) */ + uint32_t d0 = (uint32_t)u * 1290160; + /* The return value is < 16, so not altered by the conversion to uint8_t. */ + return (uint8_t)((d0 + ((uint32_t)1u << 27)) >> 28); /* round(d0/2^28) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: mlk_scalar_decompress_d4 + * + * Description: Computes round(u * q / 16) + * + * Arguments: - u: Unsigned canonical modulus modulo 16 + * to be decompressed. + * + * Specification: Decompress_4 from @[FIPS203, Eq (4.8)]. + * + ************************************************************/ + +/* Reference: Embedded into `poly_decompress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE int16_t mlk_scalar_decompress_d4(uint8_t u) +__contract__( + requires(0 <= u && u < 16) + ensures(return_value <= (MLKEM_Q - 1)) +) +{ + /* The return value is in 0..MLKEM_Q-1, hence not altered by the + * conversion to int16_t. */ + return (int16_t)((((uint32_t)u * MLKEM_Q) + 8) >> 4); +} + +/************************************************************ + * Name: mlk_scalar_compress_d5 + * + * Description: Computes round(u * 32 / q) % 32 + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + * + * Specification: Compress_5 from @[FIPS203, Eq (4.7)]. + * + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/* Reference: Embedded into `poly_compress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE uint8_t mlk_scalar_compress_d5(int16_t u) +__contract__( + requires(0 <= u && u <= MLKEM_Q - 1) + ensures(return_value < 32) + ensures(return_value == (((uint32_t)u * 32 + MLKEM_Q / 2) / MLKEM_Q) % 32) ) +{ + /* Compute as follows: + * ``` + * round(u * 32 / MLKEM_Q) + * = round(u * 32 * (2^27 / MLKEM_Q) / 2^27) + * ~= round(u * 32 * round(2^27 / MLKEM_Q) / 2^27) + * ``` + */ + /* check-magic: 1290176 == 2^5 * round(2^27 / MLKEM_Q) */ + uint32_t d0 = (uint32_t)u * 1290176; + /* The return value is < 32, so not altered by the conversion to uint8_t. */ + return (uint8_t)((d0 + ((uint32_t)1u << 26)) >> 27); /* round(d0/2^27) */ +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: mlk_scalar_decompress_d5 + * + * Description: Computes round(u * q / 32) + * + * Arguments: - u: Unsigned canonical modulus modulo 32 + * to be decompressed. + * + * Specification: Decompress_5 from @[FIPS203, Eq (4.8)]. + * + ************************************************************/ + +/* Reference: Embedded into `poly_decompress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE int16_t mlk_scalar_decompress_d5(uint8_t u) +__contract__( + requires(0 <= u && u < 32) + ensures(0 <= return_value && return_value <= MLKEM_Q - 1) +) +{ + /* The return value is in 0..MLKEM_Q-1, hence not altered by the + * conversion to int16_t. */ + return (int16_t)((((uint32_t)u * MLKEM_Q) + 16) >> 5); +} + +/************************************************************ + * Name: mlk_scalar_compress_d10 + * + * Description: Computes round(u * 2**10 / q) % 2**10 + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + * + * Specification: Compress_10 from @[FIPS203, Eq (4.7)]. + * + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/* Reference: Embedded into `polyvec_compress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE uint16_t mlk_scalar_compress_d10(int16_t u) +__contract__( + requires(0 <= u && u <= MLKEM_Q - 1) + ensures(return_value < (1u << 10)) + ensures(return_value == (((uint32_t)u * (1u << 10) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 10))) +{ + /* Compute as follows: + * ``` + * round(u * 1024 / MLKEM_Q) + * = round(u * 1024 * (2^33 / MLKEM_Q) / 2^33) + * ~= round(u * 1024 * round(2^33 / MLKEM_Q) / 2^33) + * ``` + */ + /* check-magic: 2642263040 == 2^10 * round(2^33 / MLKEM_Q) */ + uint64_t d0 = (uint64_t)u * 2642263040; + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; /* round(d0/2^33) */ + return (d0 & 0x3FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: mlk_scalar_decompress_d10 + * + * Description: Computes round(u * q / 1024) + * + * Arguments: - u: Unsigned canonical modulus modulo 1024 + * to be decompressed. + * + * Specification: Decompress_10 from @[FIPS203, Eq (4.8)]. + * + ************************************************************/ + +/* Reference: Embedded into `polyvec_decompress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE int16_t mlk_scalar_decompress_d10(uint16_t u) +__contract__( + requires(0 <= u && u < 1024) + ensures(0 <= return_value && return_value <= (MLKEM_Q - 1)) +) +{ + /* The return value is in 0..MLKEM_Q-1, hence not altered by the + * conversion to int16_t. */ + return (int16_t)((((uint32_t)u * MLKEM_Q) + 512) >> 10); +} + +/************************************************************ + * Name: mlk_scalar_compress_d11 + * + * Description: Computes round(u * 2**11 / q) % 2**11 + * + * Arguments: - u: Unsigned canonical modulus modulo q + * to be compressed. + * + * Specification: Compress_11 from @[FIPS203, Eq (4.7)]. + * + ************************************************************/ +/* + * The multiplication in this routine will exceed UINT32_MAX + * and wrap around for large values of u. This is expected and required. + */ +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "unsigned-overflow" +#endif + +/* Reference: Embedded into `polyvec_compress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE uint16_t mlk_scalar_compress_d11(int16_t u) +__contract__( + requires(0 <= u && u <= MLKEM_Q - 1) + ensures(return_value < (1u << 11)) + ensures(return_value == (((uint32_t)u * (1u << 11) + MLKEM_Q / 2) / MLKEM_Q) % (1 << 11))) +{ + /* Compute as follows: + * ``` + * round(u * 2048 / MLKEM_Q) + * = round(u * 2048 * (2^33 / MLKEM_Q) / 2^33) + * ~= round(u * 2048 * round(2^33 / MLKEM_Q) / 2^33) + * ``` + */ + /* check-magic: 5284526080 == 2^11 * round(2^33 / MLKEM_Q) */ + uint64_t d0 = (uint64_t)u * 5284526080; + d0 = (d0 + ((uint64_t)1u << 32)) >> 33; /* round(d0/2^33) */ + return (d0 & 0x7FF); +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************************ + * Name: mlk_scalar_decompress_d11 + * + * Description: Computes round(u * q / 2048) + * + * Arguments: - u: Unsigned canonical modulus modulo 2048 + * to be decompressed. + * + * Specification: Decompress_11 from @[FIPS203, Eq (4.8)]. + * + ************************************************************/ + +/* Reference: Embedded into `polyvec_decompress()` in the + * reference implementation @[REF]. */ +static MLK_INLINE int16_t mlk_scalar_decompress_d11(uint16_t u) +__contract__( + requires(0 <= u && u < 2048) + ensures(0 <= return_value && return_value <= (MLKEM_Q - 1)) +) +{ + /* The return value is in 0..MLKEM_Q-1, hence not altered by the + * conversion to int16_t. */ + return (int16_t)((((uint32_t)u * MLKEM_Q) + 1024) >> 11); +} + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || (MLKEM_K == 2 || MLKEM_K == 3) +#define mlk_poly_compress_d4 MLK_NAMESPACE(poly_compress_d4) +/************************************************* + * Name: mlk_poly_compress_d4 + * + * Description: Compression (4 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) + * - const mlk_poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + * + * Specification: Implements `ByteEncode_4 (Compress_4 (a))`: + * - ByteEncode_d: @[FIPS203, Algorithm 5], + * - Compress_d: @[FIPS203, Eq (4.7)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `ByteEncode_{d_v} (Compress_{d_v} (v))` appears in + * @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L23], + * where `d_v=4` for ML-KEM-{512,768} @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_compress_d4(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D4], + const mlk_poly *a); + +#define mlk_poly_compress_d10 MLK_NAMESPACE(poly_compress_d10) +/************************************************* + * Name: mlk_poly_compress_d10 + * + * Description: Compression (10 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * - const mlk_poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + * + * Specification: Implements `ByteEncode_10 (Compress_10 (a))`: + * - ByteEncode_d: @[FIPS203, Algorithm 5], + * - Compress_d: @[FIPS203, Eq (4.7)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `ByteEncode_{d_u} (Compress_{d_u} (u))` appears in + * @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L22], + * where `d_u=10` for ML-KEM-{512,768} @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_compress_d10(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D10], + const mlk_poly *a); + +#define mlk_poly_decompress_d4 MLK_NAMESPACE(poly_decompress_d4) +/************************************************* + * Name: mlk_poly_decompress_d4 + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D4 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + * Specification: Implements `Decompress_4 (ByteDecode_4 (a))`: + * - ByteDecode_d: @[FIPS203, Algorithm 6], + * - Decompress_d: @[FIPS203, Eq (4.8)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `Decompress_{d_v} (ByteDecode_{d_v} (v))` appears in + * @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L4], + * where `d_v=4` for ML-KEM-{512,768} @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_decompress_d4(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D4]); + +#define mlk_poly_decompress_d10 MLK_NAMESPACE(poly_decompress_d10) +/************************************************* + * Name: mlk_poly_decompress_d10 + * + * Description: De-serialization and subsequent decompression (10 bits) of a + * polynomial; approximate inverse of mlk_poly_compress_d10 + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D10 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + * Specification: Implements `Decompress_10 (ByteDecode_10 (a))`: + * - ByteDecode_d: @[FIPS203, Algorithm 6], + * - Decompress_d: @[FIPS203, Eq (4.8)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `Decompress_{d_u} (ByteDecode_{d_u} (u))` appears in + * @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L3], + * where `d_u=10` for ML-KEM-{512,768} @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_decompress_d10(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D10]); +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 2 || MLKEM_K == 3 */ + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_K == 4 +#define mlk_poly_compress_d5 MLK_NAMESPACE(poly_compress_d5) +/************************************************* + * Name: mlk_poly_compress_d5 + * + * Description: Compression (5 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) + * - const mlk_poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + * + * Specification: Implements `ByteEncode_5 (Compress_5 (a))`: + * - ByteEncode_d: @[FIPS203, Algorithm 5], + * - Compress_d: @[FIPS203, Eq (4.7)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `ByteEncode_{d_v} (Compress_{d_v} (v))` appears in + * @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L23], + * where `d_v=5` for ML-KEM-1024 @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_compress_d5(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D5], + const mlk_poly *a); + +#define mlk_poly_compress_d11 MLK_NAMESPACE(poly_compress_d11) +/************************************************* + * Name: mlk_poly_compress_d11 + * + * Description: Compression (11 bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * - const mlk_poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + * + * Specification: `ByteEncode_11 (Compress_11 (a))`: + * - ByteEncode_d: @[FIPS203, Algorithm 5], + * - Compress_d: @[FIPS203, Eq (4.7)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `ByteEncode_{d_u} (Compress_{d_u} (u))` appears in + * @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L22], + * where `d_u=11` for ML-KEM-1024 @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_compress_d11(uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_D11], + const mlk_poly *a); + +#define mlk_poly_decompress_d5 MLK_NAMESPACE(poly_decompress_d5) +/************************************************* + * Name: mlk_poly_decompress_d5 + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D5 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + * Specification: Implements `Decompress_5 (ByteDecode_5 (a))`: + * - ByteDecode_d: @[FIPS203, Algorithm 6], + * - Decompress_d: @[FIPS203, Eq (4.8)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `Decompress_{d_v} (ByteDecode_{d_v} (v))` appears in + * @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L4], + * where `d_v=5` for ML-KEM-1024 @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_decompress_d5(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D5]); + +#define mlk_poly_decompress_d11 MLK_NAMESPACE(poly_decompress_d11) +/************************************************* + * Name: mlk_poly_decompress_d11 + * + * Description: De-serialization and subsequent decompression (11 bits) of a + * polynomial; approximate inverse of mlk_poly_compress_d11 + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_D11 bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + * Specification: Implements `Decompress_11 (ByteDecode_11 (a))`: + * - ByteDecode_d: @[FIPS203, Algorithm 6], + * - Decompress_d: @[FIPS203, Eq (4.8)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `Decompress_{d_u} (ByteDecode_{d_u} (u))` appears in + * @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L3], + * where `d_u=11` for ML-KEM-1024 @[FIPS203, Table 2]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_decompress_d11(mlk_poly *r, + const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_D11]); +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_K == 4 */ + +#define mlk_poly_tobytes MLK_NAMESPACE(poly_tobytes) +/************************************************* + * Name: mlk_poly_tobytes + * + * Description: Serialization of a polynomial. + * Signed coefficients are converted to + * unsigned form before serialization. + * + * Arguments: INPUT: + * - a: const pointer to input polynomial, + * with each coefficient in the range [0,1,..,Q-1] + * OUTPUT + * - r: pointer to output byte array + * (of MLKEM_POLYBYTES bytes) + * + * Specification: Implements ByteEncode_12 @[FIPS203, Algorithm 5]. + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_tobytes(uint8_t r[MLKEM_POLYBYTES], const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYBYTES)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYBYTES)) +); + + +#define mlk_poly_frombytes MLK_NAMESPACE(poly_frombytes) +/************************************************* + * Name: mlk_poly_frombytes + * + * Description: De-serialization of a polynomial. + * + * Arguments: INPUT + * - a: pointer to input byte array + * (of MLKEM_POLYBYTES bytes) + * OUTPUT + * - r: pointer to output polynomial, with + * each coefficient unsigned and in the range + * 0 .. 4095 + * + * Specification: Implements ByteDecode_12 @[FIPS203, Algorithm 6]. + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_frombytes(mlk_poly *r, const uint8_t a[MLKEM_POLYBYTES]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYBYTES)) + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)) +); + + +#define mlk_poly_frommsg MLK_NAMESPACE(poly_frommsg) +/************************************************* + * Name: mlk_poly_frommsg + * + * Description: Convert 32-byte message to polynomial + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *msg: pointer to input message + * + * Specification: Implements `Decompress_1 (ByteDecode_1 (a))`: + * - ByteDecode_d: @[FIPS203, Algorithm 6], + * - Decompress_d: @[FIPS203, Eq (4.8)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `Decompress_1 (ByteDecode_1 (w))` appears in + * @[FIPS203, Algorithm 15 (K-PKE.Encrypt), L20]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_frommsg(mlk_poly *r, const uint8_t msg[MLKEM_INDCPA_MSGBYTES]) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +); + +#define mlk_poly_tomsg MLK_NAMESPACE(poly_tomsg) +/************************************************* + * Name: mlk_poly_tomsg + * + * Description: Convert polynomial to 32-byte message + * + * Arguments: - uint8_t *msg: pointer to output message + * - const mlk_poly *r: pointer to input polynomial + * Coefficients must be unsigned canonical + * + * Specification: Implements `ByteEncode_1 (Compress_1 (a))`: + * - ByteEncode_d: @[FIPS203, Algorithm 5], + * - Compress_d: @[FIPS203, Eq (4.7)] + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * - `ByteEncode_1 (Compress_1 (w))` appears in + * @[FIPS203, Algorithm 14 (K-PKE.Decrypt), L7]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_tomsg(uint8_t msg[MLKEM_INDCPA_MSGBYTES], const mlk_poly *r) +__contract__( + requires(memory_no_alias(msg, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(msg, MLKEM_INDCPA_MSGBYTES)) +); + +#endif /* !MLK_COMPRESS_H */ diff --git a/mlkem_native/src/debug.c b/mlkem_native/src/debug.c new file mode 100644 index 0000000..386f526 --- /dev/null +++ b/mlkem_native/src/debug.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* NOTE: You can remove this file unless you compile with MLKEM_DEBUG. */ + +#include "common.h" + +#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) && defined(MLKEM_DEBUG) + + +#include +#include +#include "debug.h" + +#define MLK_DEBUG_ERROR_HEADER "[ERROR:%s:%04d] " + +void mlk_debug_check_assert(const char *file, int line, const int val) +{ + if (val == 0) + { + fprintf(stderr, MLK_DEBUG_ERROR_HEADER "Assertion failed (value %d)\n", + file, line, val); + exit(1); + } +} + +void mlk_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive) +{ + int err = 0; + unsigned i; + for (i = 0; i < len; i++) + { + int16_t val = ptr[i]; + if (!(val > lower_bound_exclusive && val < upper_bound_exclusive)) + { + fprintf( + stderr, + MLK_DEBUG_ERROR_HEADER + "Bounds assertion failed: Index %u, value %d out of bounds (%d,%d)\n", + file, line, i, (int)val, lower_bound_exclusive, + upper_bound_exclusive); + err = 1; + } + } + + if (err == 1) + { + exit(1); + } +} + +#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED && MLKEM_DEBUG */ + +MLK_EMPTY_CU(debug) + +#endif /* !(!MLK_CONFIG_MULTILEVEL_NO_SHARED && MLKEM_DEBUG) */ + +/* To facilitate single-compilation-unit (SCU) builds, undefine all macros. + * Don't modify by hand -- this is auto-generated by scripts/autogen. */ +#undef MLK_DEBUG_ERROR_HEADER diff --git a/mlkem_native/src/debug.h b/mlkem_native/src/debug.h new file mode 100644 index 0000000..47c864b --- /dev/null +++ b/mlkem_native/src/debug.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_DEBUG_H +#define MLK_DEBUG_H +#include "common.h" + +#if defined(MLKEM_DEBUG) + +/************************************************* + * Name: mlk_assert + * + * Description: Check debug assertion + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - val: Value asserted to be non-zero + **************************************************/ +#define mlk_debug_check_assert MLK_NAMESPACE(mlkem_debug_assert) +void mlk_debug_check_assert(const char *file, int line, const int val); + +/************************************************* + * Name: mlk_debug_check_bounds + * + * Description: Check whether values in an array of int16_t + * are within specified bounds. + * + * Prints an error message to stderr and calls + * exit(1) if not. + * + * Arguments: - file: filename + * - line: line number + * - ptr: Base of array to be checked + * - len: Number of int16_t in ptr + * - lower_bound_exclusive: Exclusive lower bound + * - upper_bound_exclusive: Exclusive upper bound + **************************************************/ +#define mlk_debug_check_bounds MLK_NAMESPACE(mlkem_debug_check_bounds) +void mlk_debug_check_bounds(const char *file, int line, const int16_t *ptr, + unsigned len, int lower_bound_exclusive, + int upper_bound_exclusive); + +/* Check assertion, calling exit() upon failure + * + * val: Value that's asserted to be non-zero + */ +#define mlk_assert(val) mlk_debug_check_assert(__FILE__, __LINE__, (val)) + +/* Check bounds in array of int16_t's + * ptr: Base of int16_t array; will be explicitly cast to int16_t*, + * so you may pass a byte-compatible type such as mlk_poly or mlk_polyvec. + * len: Number of int16_t in array + * value_lb: Inclusive lower value bound + * value_ub: Exclusive upper value bound */ +#define mlk_assert_bound(ptr, len, value_lb, value_ub) \ + mlk_debug_check_bounds(__FILE__, __LINE__, (const int16_t *)(ptr), (len), \ + (value_lb) - 1, (value_ub)) + +/* Check absolute bounds in array of int16_t's + * ptr: Base of array, expression of type int16_t* + * len: Number of int16_t in array + * value_abs_bd: Exclusive absolute upper bound */ +#define mlk_assert_abs_bound(ptr, len, value_abs_bd) \ + mlk_assert_bound((ptr), (len), (-(value_abs_bd) + 1), (value_abs_bd)) + +/* Version of bounds assertions for 2-dimensional arrays */ +#define mlk_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + mlk_assert_bound((ptr), ((len0) * (len1)), (value_lb), (value_ub)) + +#define mlk_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + mlk_assert_abs_bound((ptr), ((len0) * (len1)), (value_abs_bd)) + +/* When running CBMC, convert debug assertions into proof obligations */ +#elif defined(CBMC) +#include "cbmc.h" + +#define mlk_assert(val) cassert(val) + +#define mlk_assert_bound(ptr, len, value_lb, value_ub) \ + cassert(array_bound(((int16_t *)(ptr)), 0, (len), (value_lb), (value_ub))) + +#define mlk_assert_abs_bound(ptr, len, value_abs_bd) \ + cassert(array_abs_bound(((int16_t *)(ptr)), 0, (len), (value_abs_bd))) + +/* Because of https://github.com/diffblue/cbmc/issues/8570, we can't + * just use a single flattened array_bound(...) here. */ +#define mlk_assert_bound_2d(ptr, M, N, value_lb, value_ub) \ + cassert(forall(kN, 0, (M), \ + array_bound(&((int16_t (*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_lb), (value_ub)))) + +#define mlk_assert_abs_bound_2d(ptr, M, N, value_abs_bd) \ + cassert(forall(kN, 0, (M), \ + array_abs_bound(&((int16_t (*)[(N)])(ptr))[kN][0], 0, (N), \ + (value_abs_bd)))) + +#else /* !MLKEM_DEBUG && CBMC */ + +#define mlk_assert(val) \ + do \ + { \ + } while (0) +#define mlk_assert_bound(ptr, len, value_lb, value_ub) \ + do \ + { \ + } while (0) +#define mlk_assert_abs_bound(ptr, len, value_abs_bd) \ + do \ + { \ + } while (0) + +#define mlk_assert_bound_2d(ptr, len0, len1, value_lb, value_ub) \ + do \ + { \ + } while (0) + +#define mlk_assert_abs_bound_2d(ptr, len0, len1, value_abs_bd) \ + do \ + { \ + } while (0) + + +#endif /* !MLKEM_DEBUG && !CBMC */ +#endif /* !MLK_DEBUG_H */ diff --git a/mlkem_native/src/fips202/fips202.c b/mlkem_native/src/fips202/fips202.c new file mode 100644 index 0000000..4751efb --- /dev/null +++ b/mlkem_native/src/fips202/fips202.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [mupq] + * Common files for pqm4, pqm3, pqriscv + * Kannwischer, Petri, Rijneveld, Schwabe, Stoffelen + * https://github.com/mupq/mupq + * + * - [supercop] + * SUPERCOP benchmarking framework + * Daniel J. Bernstein + * http://bench.cr.yp.to/supercop.html + * + * - [tweetfips] + * 'tweetfips202' FIPS202 implementation + * Van Assche, Bernstein, Schwabe + * https://keccak.team/2015/tweetfips202.html + */ + +/* Based on the CC0 implementation from @[mupq] and the public domain + * implementation @[supercop, crypto_hash/keccakc512/simple/] + * by Ronny Van Keer, and the public domain @[tweetfips] implementation. */ + +#include "../common.h" +#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) + + +#include "../verify.h" +#include "fips202.h" +#include "keccakf1600.h" + +/************************************************* + * Name: mlk_keccak_absorb_once + * + * Description: Absorb step of Keccak; + * non-incremental, starts by zeroeing the state. + * + * WARNING: Must only be called once. + * + * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak + * state + * - unsigned r: rate in bytes (e.g., 168 for SHAKE128) + * - const uint8_t *m: pointer to input to be absorbed into s + * - size_t mlen: length of input in bytes + * - uint8_t p: domain-separation byte for different + * Keccak-derived functions + **************************************************/ +static void mlk_keccak_absorb_once(uint64_t *s, unsigned r, const uint8_t *m, + size_t mlen, uint8_t p) +__contract__( + requires(mlen <= MLK_MAX_BUFFER_SIZE) + requires(r <= sizeof(uint64_t) * MLK_KECCAK_LANES) + requires(memory_no_alias(s, sizeof(uint64_t) * MLK_KECCAK_LANES)) + requires(memory_no_alias(m, mlen)) + assigns(memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES))) +{ + /* Initialize state */ + size_t i; + for (i = 0; i < 25; ++i) + __loop__(invariant(i <= 25)) + { + s[i] = 0; + } + + while (mlen >= r) + __loop__( + assigns(mlen, m, memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES)) + invariant(mlen <= loop_entry(mlen)) + invariant(m == loop_entry(m) + (loop_entry(mlen) - mlen))) + { + mlk_keccakf1600_xor_bytes(s, m, 0, r); + mlk_keccakf1600_permute(s); + mlen -= r; + m += r; + } + + /* At this point, mlen < r, so the truncations to unsigned are safe below. */ + + if (mlen > 0) + { + mlk_keccakf1600_xor_bytes(s, m, 0, (unsigned int)mlen); + } + + if (mlen == r - 1) + { + p |= 128; + mlk_keccakf1600_xor_bytes(s, &p, (unsigned int)mlen, 1); + } + else + { + mlk_keccakf1600_xor_bytes(s, &p, (unsigned int)mlen, 1); + p = 128; + mlk_keccakf1600_xor_bytes(s, &p, r - 1, 1); + } +} + +/************************************************* + * Name: mlk_keccak_squeezeblocks + * + * Description: block-level Keccak squeeze + * + * Arguments: - uint8_t *h: pointer to output bytes + * - size_t nblocks: number of blocks to be squeezed + * - uint64_t *s_inc: pointer to input/output state + * - unsigned r: rate in bytes (e.g., 168 for SHAKE128) + **************************************************/ +static void mlk_keccak_squeezeblocks(uint8_t *h, size_t nblocks, uint64_t *s, + unsigned r) +__contract__( + requires(r <= sizeof(uint64_t) * MLK_KECCAK_LANES) + requires(nblocks <= 8 /* somewhat arbitrary bound */) + requires(memory_no_alias(s, sizeof(uint64_t) * MLK_KECCAK_LANES)) + requires(memory_no_alias(h, nblocks * r)) + assigns(memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES)) + assigns(memory_slice(h, nblocks * r))) +{ + while (nblocks > 0) + __loop__( + assigns(h, nblocks, + memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES), + memory_slice(h, nblocks * r)) + invariant(nblocks <= loop_entry(nblocks) && + h == loop_entry(h) + r * (loop_entry(nblocks) - nblocks))) + { + mlk_keccakf1600_permute(s); + mlk_keccakf1600_extract_bytes(s, h, 0, r); + h += r; + nblocks--; + } +} + +/************************************************* + * Name: mlk_keccak_squeeze_once + * + * Description: Keccak squeeze; can be called on byte-level + * + * WARNING: This must only be called once. + * + * Arguments: - uint8_t *h: pointer to output bytes + * - size_t outlen: number of bytes to be squeezed + * - uint64_t *s_inc: pointer to Keccak state + * - unsigned r: rate in bytes (e.g., 168 for SHAKE128) + **************************************************/ +static void mlk_keccak_squeeze_once(uint8_t *h, size_t outlen, uint64_t *s, + unsigned r) +__contract__( + requires(outlen <= MLK_MAX_BUFFER_SIZE) + requires(r <= sizeof(uint64_t) * MLK_KECCAK_LANES) + requires(memory_no_alias(s, sizeof(uint64_t) * MLK_KECCAK_LANES)) + requires(memory_no_alias(h, outlen)) + assigns(memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES)) + assigns(memory_slice(h, outlen))) +{ + size_t len; + while (outlen > 0) + __loop__( + assigns(len, h, outlen, + memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES), + memory_slice(h, outlen)) + invariant(outlen <= loop_entry(outlen) && + h == loop_entry(h) + (loop_entry(outlen) - outlen))) + { + mlk_keccakf1600_permute(s); + + if (outlen < r) + { + len = outlen; + } + else + { + len = r; + } + mlk_keccakf1600_extract_bytes(s, h, 0, (unsigned int)len); + h += len; + outlen -= len; + } +} + +void mlk_shake128_absorb_once(mlk_shake128ctx *state, const uint8_t *input, + size_t inlen) +{ + mlk_keccak_absorb_once(state->ctx, SHAKE128_RATE, input, inlen, 0x1F); +} + +void mlk_shake128_squeezeblocks(uint8_t *output, size_t nblocks, + mlk_shake128ctx *state) +{ + mlk_keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE128_RATE); +} + +void mlk_shake128_init(mlk_shake128ctx *state) { (void)state; } +void mlk_shake128_release(mlk_shake128ctx *state) +{ + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(state, sizeof(mlk_shake128ctx)); +} + +typedef mlk_shake128ctx mlk_shake256ctx; +void mlk_shake256(uint8_t *output, size_t outlen, const uint8_t *input, + size_t inlen) +{ + mlk_shake256ctx state; + /* Absorb input */ + mlk_keccak_absorb_once(state.ctx, SHAKE256_RATE, input, inlen, 0x1F); + /* Squeeze output */ + mlk_keccak_squeeze_once(output, outlen, state.ctx, SHAKE256_RATE); + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(&state, sizeof(state)); +} + +void mlk_sha3_256(uint8_t *output, const uint8_t *input, size_t inlen) +{ + uint64_t ctx[25]; + /* Absorb input */ + mlk_keccak_absorb_once(ctx, SHA3_256_RATE, input, inlen, 0x06); + /* Squeeze output */ + mlk_keccak_squeeze_once(output, 32, ctx, SHA3_256_RATE); + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(ctx, sizeof(ctx)); +} + +void mlk_sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) +{ + uint64_t ctx[25]; + /* Absorb input */ + mlk_keccak_absorb_once(ctx, SHA3_512_RATE, input, inlen, 0x06); + /* Squeeze output */ + mlk_keccak_squeeze_once(output, 64, ctx, SHA3_512_RATE); + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(ctx, sizeof(ctx)); +} + +#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +MLK_EMPTY_CU(fips202) + +#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */ diff --git a/mlkem_native/src/fips202/fips202.h b/mlkem_native/src/fips202/fips202.h new file mode 100644 index 0000000..9ebc158 --- /dev/null +++ b/mlkem_native/src/fips202/fips202.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_FIPS202_FIPS202_H +#define MLK_FIPS202_FIPS202_H + +#include "../cbmc.h" +#include "../common.h" + +#define SHAKE128_RATE 168 +#define SHAKE256_RATE 136 +#define SHA3_256_RATE 136 +#define SHA3_384_RATE 104 +#define SHA3_512_RATE 72 + +/* Context for non-incremental API */ +typedef struct +{ + uint64_t ctx[25]; +} MLK_ALIGN mlk_shake128ctx; + +#define mlk_shake128_absorb_once MLK_NAMESPACE(shake128_absorb_once) +/************************************************* + * Name: mlk_shake128_absorb_once + * + * Description: One-shot absorb step of the SHAKE128 XOF. + * + * For call-sites (in mlkem-native): + * - This function MUST ONLY be called straight after + * mlk_shake128_init(). + * - This function MUST ONLY be called once. + * + * Consequently, for providers of custom FIPS202 code + * to be used with mlkem-native: + * - You may assume that the input context is + * freshly initialized via mlk_shake128_init(). + * - You may assume that this function is + * called exactly once. + * + * Arguments: - mlk_shake128ctx *state: pointer to SHAKE128 context + * - const uint8_t *input: pointer to input to be absorbed into + * the state + * - size_t inlen: length of input in bytes + **************************************************/ +void mlk_shake128_absorb_once(mlk_shake128ctx *state, const uint8_t *input, + size_t inlen) +__contract__( + requires(inlen <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(state, sizeof(mlk_shake128ctx))) + requires(memory_no_alias(input, inlen)) + assigns(memory_slice(state, sizeof(mlk_shake128ctx))) +); + +#define mlk_shake128_squeezeblocks MLK_NAMESPACE(shake128_squeezeblocks) +/************************************************* + * Name: mlk_shake128_squeezeblocks + * + * Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of + * SHAKE128_RATE bytes each. Modifies the state. Can be called + * multiple times to keep squeezing, i.e., is incremental. + * + * Arguments: - uint8_t *output: pointer to output blocks + * - size_t nblocks: number of blocks to be squeezed (written + * to output) + * - mlk_shake128ctx *state: pointer to in/output Keccak state + **************************************************/ +void mlk_shake128_squeezeblocks(uint8_t *output, size_t nblocks, + mlk_shake128ctx *state) +__contract__( + requires(nblocks <= 8 /* somewhat arbitrary bound */) + requires(memory_no_alias(state, sizeof(mlk_shake128ctx))) + requires(memory_no_alias(output, nblocks * SHAKE128_RATE)) + assigns(memory_slice(output, nblocks * SHAKE128_RATE), memory_slice(state, sizeof(mlk_shake128ctx))) +); + +#define mlk_shake128_init MLK_NAMESPACE(shake128_init) +void mlk_shake128_init(mlk_shake128ctx *state); + +#define mlk_shake128_release MLK_NAMESPACE(shake128_release) +void mlk_shake128_release(mlk_shake128ctx *state); + +/* One-stop SHAKE256 call. Aliasing between input and + * output is not permitted */ +#define mlk_shake256 MLK_NAMESPACE(shake256) +/************************************************* + * Name: mlk_shake256 + * + * Description: SHAKE256 XOF with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - size_t outlen: requested output length in bytes + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void mlk_shake256(uint8_t *output, size_t outlen, const uint8_t *input, + size_t inlen) +__contract__( + requires(inlen <= MLK_MAX_BUFFER_SIZE) + requires(outlen <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(input, inlen)) + requires(memory_no_alias(output, outlen)) + assigns(memory_slice(output, outlen)) +); + +/* One-stop SHA3_256 call. Aliasing between input and + * output is not permitted */ +#define SHA3_256_HASHBYTES 32 +#define mlk_sha3_256 MLK_NAMESPACE(sha3_256) +/************************************************* + * Name: mlk_sha3_256 + * + * Description: SHA3-256 with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void mlk_sha3_256(uint8_t *output, const uint8_t *input, size_t inlen) +__contract__( + requires(inlen <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(input, inlen)) + requires(memory_no_alias(output, SHA3_256_HASHBYTES)) + assigns(memory_slice(output, SHA3_256_HASHBYTES)) +); + +/* One-stop SHA3_512 call. Aliasing between input and + * output is not permitted */ +#define SHA3_512_HASHBYTES 64 +#define mlk_sha3_512 MLK_NAMESPACE(sha3_512) +/************************************************* + * Name: mlk_sha3_512 + * + * Description: SHA3-512 with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void mlk_sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) +__contract__( + requires(inlen <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(input, inlen)) + requires(memory_no_alias(output, SHA3_512_HASHBYTES)) + assigns(memory_slice(output, SHA3_512_HASHBYTES)) +); + +#if !defined(MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202) || \ + !defined(MLK_USE_FIPS202_X4_NATIVE) +/* If you provide your own FIPS-202 implementation where the x4- + * Keccak-f1600-x4 implementation falls back to 4-fold Keccak-f1600, + * set this to gain a small speedup. */ +#define FIPS202_X4_DEFAULT_IMPLEMENTATION +#endif /* !MLK_CONFIG_USE_NATIVE_BACKEND_FIPS202 || !MLK_USE_FIPS202_X4_NATIVE \ + */ + + +#endif /* !MLK_FIPS202_FIPS202_H */ diff --git a/mlkem_native/src/fips202/fips202x4.c b/mlkem_native/src/fips202/fips202x4.c new file mode 100644 index 0000000..95f8848 --- /dev/null +++ b/mlkem_native/src/fips202/fips202x4.c @@ -0,0 +1,208 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + */ + +#include "../common.h" +#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) + +#include "../verify.h" +#include "fips202.h" +#include "fips202x4.h" +#include "keccakf1600.h" + +typedef mlk_shake128x4ctx mlk_shake256x4_ctx; + +static void mlk_keccak_absorb_once_x4(uint64_t *s, unsigned r, + const uint8_t *in0, const uint8_t *in1, + const uint8_t *in2, const uint8_t *in3, + size_t inlen, uint8_t p) +__contract__( + requires(inlen <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(s, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) + requires(r <= sizeof(uint64_t) * MLK_KECCAK_LANES) + requires(memory_no_alias(in0, inlen)) + requires(memory_no_alias(in1, inlen)) + requires(memory_no_alias(in2, inlen)) + requires(memory_no_alias(in3, inlen)) + assigns(memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY))) +{ + while (inlen >= r) + __loop__( + assigns(inlen, in0, in1, in2, in3, memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) + invariant(inlen <= loop_entry(inlen)) + invariant(in0 == loop_entry(in0) + (loop_entry(inlen) - inlen)) + invariant(in1 == loop_entry(in1) + (loop_entry(inlen) - inlen)) + invariant(in2 == loop_entry(in2) + (loop_entry(inlen) - inlen)) + invariant(in3 == loop_entry(in3) + (loop_entry(inlen) - inlen))) + { + mlk_keccakf1600x4_xor_bytes(s, in0, in1, in2, in3, 0, r); + mlk_keccakf1600x4_permute(s); + + in0 += r; + in1 += r; + in2 += r; + in3 += r; + inlen -= r; + } + + /* At this point, inlen < r, so the truncations to unsigned are safe below. */ + + if (inlen > 0) + { + mlk_keccakf1600x4_xor_bytes(s, in0, in1, in2, in3, 0, (unsigned int)inlen); + } + + if (inlen == r - 1) + { + p |= 128; + mlk_keccakf1600x4_xor_bytes(s, &p, &p, &p, &p, (unsigned int)inlen, 1); + } + else + { + mlk_keccakf1600x4_xor_bytes(s, &p, &p, &p, &p, (unsigned int)inlen, 1); + p = 128; + mlk_keccakf1600x4_xor_bytes(s, &p, &p, &p, &p, r - 1, 1); + } +} + +static void mlk_keccak_squeezeblocks_x4(uint8_t *out0, uint8_t *out1, + uint8_t *out2, uint8_t *out3, + size_t nblocks, uint64_t *s, unsigned r) +__contract__( + requires(r <= sizeof(uint64_t) * MLK_KECCAK_LANES) + requires(r == SHAKE128_RATE || r == SHAKE256_RATE) + requires(nblocks <= (MLK_MAX_BUFFER_SIZE / SHAKE256_RATE)) + requires(memory_no_alias(s, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) + requires(memory_no_alias(out0, nblocks * r)) + requires(memory_no_alias(out1, nblocks * r)) + requires(memory_no_alias(out2, nblocks * r)) + requires(memory_no_alias(out3, nblocks * r)) + assigns(memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) + assigns(memory_slice(out0, nblocks * r)) + assigns(memory_slice(out1, nblocks * r)) + assigns(memory_slice(out2, nblocks * r)) + assigns(memory_slice(out3, nblocks * r))) +{ + while (nblocks > 0) + __loop__( + assigns(out0, out1, out2, out3, nblocks, + memory_slice(s, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY), + memory_slice(out0, nblocks * r), + memory_slice(out1, nblocks * r), + memory_slice(out2, nblocks * r), + memory_slice(out3, nblocks * r)) + invariant(nblocks <= loop_entry(nblocks) && + out0 == loop_entry(out0) + r * (loop_entry(nblocks) - nblocks) && + out1 == loop_entry(out1) + r * (loop_entry(nblocks) - nblocks) && + out2 == loop_entry(out2) + r * (loop_entry(nblocks) - nblocks) && + out3 == loop_entry(out3) + r * (loop_entry(nblocks) - nblocks))) + { + mlk_keccakf1600x4_permute(s); + mlk_keccakf1600x4_extract_bytes(s, out0, out1, out2, out3, 0, r); + + out0 += r; + out1 += r; + out2 += r; + out3 += r; + nblocks--; + } +} + +void mlk_shake128x4_absorb_once(mlk_shake128x4ctx *state, const uint8_t *in0, + const uint8_t *in1, const uint8_t *in2, + const uint8_t *in3, size_t inlen) +{ + mlk_memset(state, 0, sizeof(mlk_shake128x4ctx)); + mlk_keccak_absorb_once_x4(state->ctx, SHAKE128_RATE, in0, in1, in2, in3, + inlen, 0x1F); +} + +void mlk_shake128x4_squeezeblocks(uint8_t *out0, uint8_t *out1, uint8_t *out2, + uint8_t *out3, size_t nblocks, + mlk_shake128x4ctx *state) +{ + mlk_keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, + SHAKE128_RATE); +} + +void mlk_shake128x4_init(mlk_shake128x4ctx *state) { (void)state; } +void mlk_shake128x4_release(mlk_shake128x4ctx *state) +{ + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(state, sizeof(mlk_shake128x4ctx)); +} + +static void mlk_shake256x4_absorb_once(mlk_shake256x4_ctx *state, + const uint8_t *in0, const uint8_t *in1, + const uint8_t *in2, const uint8_t *in3, + size_t inlen) +{ + mlk_memset(state, 0, sizeof(mlk_shake128x4ctx)); + mlk_keccak_absorb_once_x4(state->ctx, SHAKE256_RATE, in0, in1, in2, in3, + inlen, 0x1F); +} + +static void mlk_shake256x4_squeezeblocks(uint8_t *out0, uint8_t *out1, + uint8_t *out2, uint8_t *out3, + size_t nblocks, + mlk_shake256x4_ctx *state) +{ + mlk_keccak_squeezeblocks_x4(out0, out1, out2, out3, nblocks, state->ctx, + SHAKE256_RATE); +} + +void mlk_shake256x4(uint8_t *out0, uint8_t *out1, uint8_t *out2, uint8_t *out3, + size_t outlen, uint8_t *in0, uint8_t *in1, uint8_t *in2, + uint8_t *in3, size_t inlen) +{ + mlk_shake256x4_ctx statex; + size_t nblocks = outlen / SHAKE256_RATE; + uint8_t tmp0[SHAKE256_RATE]; + uint8_t tmp1[SHAKE256_RATE]; + uint8_t tmp2[SHAKE256_RATE]; + uint8_t tmp3[SHAKE256_RATE]; + + mlk_shake256x4_absorb_once(&statex, in0, in1, in2, in3, inlen); + mlk_shake256x4_squeezeblocks(out0, out1, out2, out3, nblocks, &statex); + + out0 += nblocks * SHAKE256_RATE; + out1 += nblocks * SHAKE256_RATE; + out2 += nblocks * SHAKE256_RATE; + out3 += nblocks * SHAKE256_RATE; + + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) + { + mlk_shake256x4_squeezeblocks(tmp0, tmp1, tmp2, tmp3, 1, &statex); + mlk_memcpy(out0, tmp0, outlen); + mlk_memcpy(out1, tmp1, outlen); + mlk_memcpy(out2, tmp2, outlen); + mlk_memcpy(out3, tmp3, outlen); + } + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(&statex, sizeof(statex)); + mlk_zeroize(tmp0, sizeof(tmp0)); + mlk_zeroize(tmp1, sizeof(tmp1)); + mlk_zeroize(tmp2, sizeof(tmp2)); + mlk_zeroize(tmp3, sizeof(tmp3)); +} + +#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +MLK_EMPTY_CU(fips202x4) + +#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */ diff --git a/mlkem_native/src/fips202/fips202x4.h b/mlkem_native/src/fips202/fips202x4.h new file mode 100644 index 0000000..1f6a8be --- /dev/null +++ b/mlkem_native/src/fips202/fips202x4.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_FIPS202_FIPS202X4_H +#define MLK_FIPS202_FIPS202X4_H + + +#include "../cbmc.h" +#include "../common.h" + +#include "fips202.h" +#include "keccakf1600.h" + +/* Context for non-incremental API */ +typedef struct +{ + uint64_t ctx[MLK_KECCAK_LANES * MLK_KECCAK_WAY]; +} MLK_ALIGN mlk_shake128x4ctx; + +#define mlk_shake128x4_absorb_once MLK_NAMESPACE(shake128x4_absorb_once) +void mlk_shake128x4_absorb_once(mlk_shake128x4ctx *state, const uint8_t *in0, + const uint8_t *in1, const uint8_t *in2, + const uint8_t *in3, size_t inlen) +__contract__( + requires(inlen <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(state, sizeof(mlk_shake128x4ctx))) + requires(memory_no_alias(in0, inlen)) + requires(memory_no_alias(in1, inlen)) + requires(memory_no_alias(in2, inlen)) + requires(memory_no_alias(in3, inlen)) + assigns(memory_slice(state, sizeof(mlk_shake128x4ctx))) +); + +#define mlk_shake128x4_squeezeblocks MLK_NAMESPACE(shake128x4_squeezeblocks) +void mlk_shake128x4_squeezeblocks(uint8_t *out0, uint8_t *out1, uint8_t *out2, + uint8_t *out3, size_t nblocks, + mlk_shake128x4ctx *state) +__contract__( + requires(nblocks <= 8 /* somewhat arbitrary bound */) + requires(memory_no_alias(state, sizeof(mlk_shake128x4ctx))) + requires(memory_no_alias(out0, nblocks * SHAKE128_RATE)) + requires(memory_no_alias(out1, nblocks * SHAKE128_RATE)) + requires(memory_no_alias(out2, nblocks * SHAKE128_RATE)) + requires(memory_no_alias(out3, nblocks * SHAKE128_RATE)) + assigns(memory_slice(out0, nblocks * SHAKE128_RATE), + memory_slice(out1, nblocks * SHAKE128_RATE), + memory_slice(out2, nblocks * SHAKE128_RATE), + memory_slice(out3, nblocks * SHAKE128_RATE), + memory_slice(state, sizeof(mlk_shake128x4ctx))) +); + +#define mlk_shake128x4_init MLK_NAMESPACE(shake128x4_init) +void mlk_shake128x4_init(mlk_shake128x4ctx *state); + +#define mlk_shake128x4_release MLK_NAMESPACE(shake128x4_release) +void mlk_shake128x4_release(mlk_shake128x4ctx *state); + +#define mlk_shake256x4 MLK_NAMESPACE(shake256x4) +void mlk_shake256x4(uint8_t *out0, uint8_t *out1, uint8_t *out2, uint8_t *out3, + size_t outlen, uint8_t *in0, uint8_t *in1, uint8_t *in2, + uint8_t *in3, size_t inlen) +__contract__( + requires(inlen <= MLK_MAX_BUFFER_SIZE) + requires(outlen <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(in0, inlen)) + requires(memory_no_alias(in1, inlen)) + requires(memory_no_alias(in2, inlen)) + requires(memory_no_alias(in3, inlen)) + requires(memory_no_alias(out0, outlen)) + requires(memory_no_alias(out1, outlen)) + requires(memory_no_alias(out2, outlen)) + requires(memory_no_alias(out3, outlen)) + assigns(memory_slice(out0, outlen)) + assigns(memory_slice(out1, outlen)) + assigns(memory_slice(out2, outlen)) + assigns(memory_slice(out3, outlen)) +); + +#endif /* !MLK_FIPS202_FIPS202X4_H */ diff --git a/mlkem_native/src/fips202/keccakf1600.c b/mlkem_native/src/fips202/keccakf1600.c new file mode 100644 index 0000000..cf423e3 --- /dev/null +++ b/mlkem_native/src/fips202/keccakf1600.c @@ -0,0 +1,463 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [mupq] + * Common files for pqm4, pqm3, pqriscv + * Kannwischer, Petri, Rijneveld, Schwabe, Stoffelen + * https://github.com/mupq/mupq + * + * - [supercop] + * SUPERCOP benchmarking framework + * Daniel J. Bernstein + * http://bench.cr.yp.to/supercop.html + * + * - [tweetfips] + * 'tweetfips202' FIPS202 implementation + * Van Assche, Bernstein, Schwabe + * https://keccak.team/2015/tweetfips202.html + */ + +/* Based on the CC0 implementation from @[mupq] and the public domain + * implementation @[supercop, crypto_hash/keccakc512/simple/] + * by Ronny Van Keer, and the public domain @[tweetfips] implementation. */ + + +#include "keccakf1600.h" +#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) + +#define MLK_KECCAK_NROUNDS 24 +#define MLK_KECCAK_ROL(a, offset) ((a << offset) ^ (a >> (64 - offset))) + +void mlk_keccakf1600_extract_bytes(uint64_t *state, unsigned char *data, + unsigned offset, unsigned length) +{ + unsigned i; +#if defined(MLK_SYS_LITTLE_ENDIAN) + uint8_t *state_ptr = (uint8_t *)state + offset; + for (i = 0; i < length; i++) + __loop__(invariant(i <= length)) + { + data[i] = state_ptr[i]; + } +#else /* MLK_SYS_LITTLE_ENDIAN */ + /* Portable version */ + for (i = 0; i < length; i++) + __loop__(invariant(i <= length)) + { + data[i] = (state[(offset + i) >> 3] >> (8 * ((offset + i) & 0x07))) & 0xFF; + } +#endif /* !MLK_SYS_LITTLE_ENDIAN */ +} + +void mlk_keccakf1600_xor_bytes(uint64_t *state, const unsigned char *data, + unsigned offset, unsigned length) +{ + unsigned i; +#if defined(MLK_SYS_LITTLE_ENDIAN) + uint8_t *state_ptr = (uint8_t *)state + offset; + for (i = 0; i < length; i++) + __loop__(invariant(i <= length)) + { + state_ptr[i] ^= data[i]; + } +#else /* MLK_SYS_LITTLE_ENDIAN */ + /* Portable version */ + for (i = 0; i < length; i++) + __loop__(invariant(i <= length)) + { + state[(offset + i) >> 3] ^= (uint64_t)data[i] + << (8 * ((offset + i) & 0x07)); + } +#endif /* !MLK_SYS_LITTLE_ENDIAN */ +} + +static void mlk_keccakf1600x4_extract_bytes_c(uint64_t *state, + unsigned char *data0, + unsigned char *data1, + unsigned char *data2, + unsigned char *data3, + unsigned offset, unsigned length) +{ + mlk_keccakf1600_extract_bytes(state + MLK_KECCAK_LANES * 0, data0, offset, + length); + mlk_keccakf1600_extract_bytes(state + MLK_KECCAK_LANES * 1, data1, offset, + length); + mlk_keccakf1600_extract_bytes(state + MLK_KECCAK_LANES * 2, data2, offset, + length); + mlk_keccakf1600_extract_bytes(state + MLK_KECCAK_LANES * 3, data3, offset, + length); +} + +void mlk_keccakf1600x4_extract_bytes(uint64_t *state, unsigned char *data0, + unsigned char *data1, unsigned char *data2, + unsigned char *data3, unsigned offset, + unsigned length) +{ +#if defined(MLK_USE_FIPS202_X4_EXTRACT_BYTES_NATIVE) + if (mlk_keccakf1600_extract_bytes_x4_native(state, data0, data1, data2, data3, + offset, length) == + MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_FIPS202_X4_EXTRACT_BYTES_NATIVE */ + mlk_keccakf1600x4_extract_bytes_c(state, data0, data1, data2, data3, offset, + length); +} + +static void mlk_keccakf1600x4_xor_bytes_c(uint64_t *state, + const unsigned char *data0, + const unsigned char *data1, + const unsigned char *data2, + const unsigned char *data3, + unsigned offset, unsigned length) +{ + mlk_keccakf1600_xor_bytes(state + MLK_KECCAK_LANES * 0, data0, offset, + length); + mlk_keccakf1600_xor_bytes(state + MLK_KECCAK_LANES * 1, data1, offset, + length); + mlk_keccakf1600_xor_bytes(state + MLK_KECCAK_LANES * 2, data2, offset, + length); + mlk_keccakf1600_xor_bytes(state + MLK_KECCAK_LANES * 3, data3, offset, + length); +} + +void mlk_keccakf1600x4_xor_bytes(uint64_t *state, const unsigned char *data0, + const unsigned char *data1, + const unsigned char *data2, + const unsigned char *data3, unsigned offset, + unsigned length) +{ +#if defined(MLK_USE_FIPS202_X4_XOR_BYTES_NATIVE) + if (mlk_keccakf1600_xor_bytes_x4_native(state, data0, data1, data2, data3, + offset, + length) == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_FIPS202_X4_XOR_BYTES_NATIVE */ + mlk_keccakf1600x4_xor_bytes_c(state, data0, data1, data2, data3, offset, + length); +} + +void mlk_keccakf1600x4_permute(uint64_t *state) +{ +#if defined(MLK_USE_FIPS202_X4_NATIVE) + if (mlk_keccak_f1600_x4_native(state) == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_FIPS202_X4_NATIVE */ + mlk_keccakf1600_permute(state + MLK_KECCAK_LANES * 0); + mlk_keccakf1600_permute(state + MLK_KECCAK_LANES * 1); + mlk_keccakf1600_permute(state + MLK_KECCAK_LANES * 2); + mlk_keccakf1600_permute(state + MLK_KECCAK_LANES * 3); +} + +static const uint64_t mlk_KeccakF_RoundConstants[MLK_KECCAK_NROUNDS] = { + (uint64_t)0x0000000000000001ULL, (uint64_t)0x0000000000008082ULL, + (uint64_t)0x800000000000808aULL, (uint64_t)0x8000000080008000ULL, + (uint64_t)0x000000000000808bULL, (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008081ULL, (uint64_t)0x8000000000008009ULL, + (uint64_t)0x000000000000008aULL, (uint64_t)0x0000000000000088ULL, + (uint64_t)0x0000000080008009ULL, (uint64_t)0x000000008000000aULL, + (uint64_t)0x000000008000808bULL, (uint64_t)0x800000000000008bULL, + (uint64_t)0x8000000000008089ULL, (uint64_t)0x8000000000008003ULL, + (uint64_t)0x8000000000008002ULL, (uint64_t)0x8000000000000080ULL, + (uint64_t)0x000000000000800aULL, (uint64_t)0x800000008000000aULL, + (uint64_t)0x8000000080008081ULL, (uint64_t)0x8000000000008080ULL, + (uint64_t)0x0000000080000001ULL, (uint64_t)0x8000000080008008ULL}; + +MLK_STATIC_TESTABLE +void mlk_keccakf1600_permute_c(uint64_t *state) +{ + unsigned round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + uint64_t BCa, BCe, BCi, BCo, BCu; + uint64_t Da, De, Di, Do, Du; + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + /* copyFromState(A, state) */ + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for (round = 0; round < MLK_KECCAK_NROUNDS; round += 2) + __loop__(invariant(round <= MLK_KECCAK_NROUNDS && round % 2 == 0)) + { + /* prepareTheta */ + BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; + BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + /* thetaRhoPiChiIotaPrepareTheta(round, A, E) */ + Da = BCu ^ MLK_KECCAK_ROL(BCe, 1); + De = BCa ^ MLK_KECCAK_ROL(BCi, 1); + Di = BCe ^ MLK_KECCAK_ROL(BCo, 1); + Do = BCi ^ MLK_KECCAK_ROL(BCu, 1); + Du = BCo ^ MLK_KECCAK_ROL(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = MLK_KECCAK_ROL(Age, 44); + Aki ^= Di; + BCi = MLK_KECCAK_ROL(Aki, 43); + Amo ^= Do; + BCo = MLK_KECCAK_ROL(Amo, 21); + Asu ^= Du; + BCu = MLK_KECCAK_ROL(Asu, 14); + Eba = BCa ^ ((~BCe) & BCi); + Eba ^= (uint64_t)mlk_KeccakF_RoundConstants[round]; + Ebe = BCe ^ ((~BCi) & BCo); + Ebi = BCi ^ ((~BCo) & BCu); + Ebo = BCo ^ ((~BCu) & BCa); + Ebu = BCu ^ ((~BCa) & BCe); + + Abo ^= Do; + BCa = MLK_KECCAK_ROL(Abo, 28); + Agu ^= Du; + BCe = MLK_KECCAK_ROL(Agu, 20); + Aka ^= Da; + BCi = MLK_KECCAK_ROL(Aka, 3); + Ame ^= De; + BCo = MLK_KECCAK_ROL(Ame, 45); + Asi ^= Di; + BCu = MLK_KECCAK_ROL(Asi, 61); + Ega = BCa ^ ((~BCe) & BCi); + Ege = BCe ^ ((~BCi) & BCo); + Egi = BCi ^ ((~BCo) & BCu); + Ego = BCo ^ ((~BCu) & BCa); + Egu = BCu ^ ((~BCa) & BCe); + + Abe ^= De; + BCa = MLK_KECCAK_ROL(Abe, 1); + Agi ^= Di; + BCe = MLK_KECCAK_ROL(Agi, 6); + Ako ^= Do; + BCi = MLK_KECCAK_ROL(Ako, 25); + Amu ^= Du; + BCo = MLK_KECCAK_ROL(Amu, 8); + Asa ^= Da; + BCu = MLK_KECCAK_ROL(Asa, 18); + Eka = BCa ^ ((~BCe) & BCi); + Eke = BCe ^ ((~BCi) & BCo); + Eki = BCi ^ ((~BCo) & BCu); + Eko = BCo ^ ((~BCu) & BCa); + Eku = BCu ^ ((~BCa) & BCe); + + Abu ^= Du; + BCa = MLK_KECCAK_ROL(Abu, 27); + Aga ^= Da; + BCe = MLK_KECCAK_ROL(Aga, 36); + Ake ^= De; + BCi = MLK_KECCAK_ROL(Ake, 10); + Ami ^= Di; + BCo = MLK_KECCAK_ROL(Ami, 15); + Aso ^= Do; + BCu = MLK_KECCAK_ROL(Aso, 56); + Ema = BCa ^ ((~BCe) & BCi); + Eme = BCe ^ ((~BCi) & BCo); + Emi = BCi ^ ((~BCo) & BCu); + Emo = BCo ^ ((~BCu) & BCa); + Emu = BCu ^ ((~BCa) & BCe); + + Abi ^= Di; + BCa = MLK_KECCAK_ROL(Abi, 62); + Ago ^= Do; + BCe = MLK_KECCAK_ROL(Ago, 55); + Aku ^= Du; + BCi = MLK_KECCAK_ROL(Aku, 39); + Ama ^= Da; + BCo = MLK_KECCAK_ROL(Ama, 41); + Ase ^= De; + BCu = MLK_KECCAK_ROL(Ase, 2); + Esa = BCa ^ ((~BCe) & BCi); + Ese = BCe ^ ((~BCi) & BCo); + Esi = BCi ^ ((~BCo) & BCu); + Eso = BCo ^ ((~BCu) & BCa); + Esu = BCu ^ ((~BCa) & BCe); + + /* prepareTheta */ + BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + /* thetaRhoPiChiIotaPrepareTheta(round+1, E, A) */ + Da = BCu ^ MLK_KECCAK_ROL(BCe, 1); + De = BCa ^ MLK_KECCAK_ROL(BCi, 1); + Di = BCe ^ MLK_KECCAK_ROL(BCo, 1); + Do = BCi ^ MLK_KECCAK_ROL(BCu, 1); + Du = BCo ^ MLK_KECCAK_ROL(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = MLK_KECCAK_ROL(Ege, 44); + Eki ^= Di; + BCi = MLK_KECCAK_ROL(Eki, 43); + Emo ^= Do; + BCo = MLK_KECCAK_ROL(Emo, 21); + Esu ^= Du; + BCu = MLK_KECCAK_ROL(Esu, 14); + Aba = BCa ^ ((~BCe) & BCi); + Aba ^= (uint64_t)mlk_KeccakF_RoundConstants[round + 1]; + Abe = BCe ^ ((~BCi) & BCo); + Abi = BCi ^ ((~BCo) & BCu); + Abo = BCo ^ ((~BCu) & BCa); + Abu = BCu ^ ((~BCa) & BCe); + + Ebo ^= Do; + BCa = MLK_KECCAK_ROL(Ebo, 28); + Egu ^= Du; + BCe = MLK_KECCAK_ROL(Egu, 20); + Eka ^= Da; + BCi = MLK_KECCAK_ROL(Eka, 3); + Eme ^= De; + BCo = MLK_KECCAK_ROL(Eme, 45); + Esi ^= Di; + BCu = MLK_KECCAK_ROL(Esi, 61); + Aga = BCa ^ ((~BCe) & BCi); + Age = BCe ^ ((~BCi) & BCo); + Agi = BCi ^ ((~BCo) & BCu); + Ago = BCo ^ ((~BCu) & BCa); + Agu = BCu ^ ((~BCa) & BCe); + + Ebe ^= De; + BCa = MLK_KECCAK_ROL(Ebe, 1); + Egi ^= Di; + BCe = MLK_KECCAK_ROL(Egi, 6); + Eko ^= Do; + BCi = MLK_KECCAK_ROL(Eko, 25); + Emu ^= Du; + BCo = MLK_KECCAK_ROL(Emu, 8); + Esa ^= Da; + BCu = MLK_KECCAK_ROL(Esa, 18); + Aka = BCa ^ ((~BCe) & BCi); + Ake = BCe ^ ((~BCi) & BCo); + Aki = BCi ^ ((~BCo) & BCu); + Ako = BCo ^ ((~BCu) & BCa); + Aku = BCu ^ ((~BCa) & BCe); + + Ebu ^= Du; + BCa = MLK_KECCAK_ROL(Ebu, 27); + Ega ^= Da; + BCe = MLK_KECCAK_ROL(Ega, 36); + Eke ^= De; + BCi = MLK_KECCAK_ROL(Eke, 10); + Emi ^= Di; + BCo = MLK_KECCAK_ROL(Emi, 15); + Eso ^= Do; + BCu = MLK_KECCAK_ROL(Eso, 56); + Ama = BCa ^ ((~BCe) & BCi); + Ame = BCe ^ ((~BCi) & BCo); + Ami = BCi ^ ((~BCo) & BCu); + Amo = BCo ^ ((~BCu) & BCa); + Amu = BCu ^ ((~BCa) & BCe); + + Ebi ^= Di; + BCa = MLK_KECCAK_ROL(Ebi, 62); + Ego ^= Do; + BCe = MLK_KECCAK_ROL(Ego, 55); + Eku ^= Du; + BCi = MLK_KECCAK_ROL(Eku, 39); + Ema ^= Da; + BCo = MLK_KECCAK_ROL(Ema, 41); + Ese ^= De; + BCu = MLK_KECCAK_ROL(Ese, 2); + Asa = BCa ^ ((~BCe) & BCi); + Ase = BCe ^ ((~BCi) & BCo); + Asi = BCi ^ ((~BCo) & BCu); + Aso = BCo ^ ((~BCu) & BCa); + Asu = BCu ^ ((~BCa) & BCe); + } + + /* copyToState(state, A) */ + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; +} + +void mlk_keccakf1600_permute(uint64_t *state) +{ +#if defined(MLK_USE_FIPS202_X1_NATIVE) + if (mlk_keccak_f1600_x1_native(state) == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_FIPS202_X1_NATIVE */ + mlk_keccakf1600_permute_c(state); +} + +#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +MLK_EMPTY_CU(keccakf1600) + +#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +/* To facilitate single-compilation-unit (SCU) builds, undefine all macros. + * Don't modify by hand -- this is auto-generated by scripts/autogen. */ +#undef MLK_KECCAK_NROUNDS +#undef MLK_KECCAK_ROL diff --git a/mlkem_native/src/fips202/keccakf1600.h b/mlkem_native/src/fips202/keccakf1600.h new file mode 100644 index 0000000..c26c36a --- /dev/null +++ b/mlkem_native/src/fips202/keccakf1600.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_FIPS202_KECCAKF1600_H +#define MLK_FIPS202_KECCAKF1600_H +#include "../cbmc.h" +#include "../common.h" + +#define MLK_KECCAK_LANES 25 +#define MLK_KECCAK_WAY 4 + +/* + * WARNING: + * The contents of this structure, including the placement + * and interleaving of Keccak lanes, are IMPLEMENTATION-DEFINED. + * The struct is only exposed here to allow its construction on the stack. + */ + +#define mlk_keccakf1600_extract_bytes MLK_NAMESPACE(keccakf1600_extract_bytes) +void mlk_keccakf1600_extract_bytes(uint64_t *state, unsigned char *data, + unsigned offset, unsigned length) +__contract__( + requires(0 <= offset && offset <= MLK_KECCAK_LANES * sizeof(uint64_t) && + 0 <= length && length <= MLK_KECCAK_LANES * sizeof(uint64_t) - offset) + requires(memory_no_alias(state, sizeof(uint64_t) * MLK_KECCAK_LANES)) + requires(memory_no_alias(data, length)) + assigns(memory_slice(data, length)) +); + +#define mlk_keccakf1600_xor_bytes MLK_NAMESPACE(keccakf1600_xor_bytes) +void mlk_keccakf1600_xor_bytes(uint64_t *state, const unsigned char *data, + unsigned offset, unsigned length) +__contract__( + requires(0 <= offset && offset <= MLK_KECCAK_LANES * sizeof(uint64_t) && + 0 <= length && length <= MLK_KECCAK_LANES * sizeof(uint64_t) - offset) + requires(memory_no_alias(state, sizeof(uint64_t) * MLK_KECCAK_LANES)) + requires(memory_no_alias(data, length)) + assigns(memory_slice(state, sizeof(uint64_t) * MLK_KECCAK_LANES)) +); + +#define mlk_keccakf1600x4_extract_bytes \ + MLK_NAMESPACE(keccakf1600x4_extract_bytes) +void mlk_keccakf1600x4_extract_bytes(uint64_t *state, unsigned char *data0, + unsigned char *data1, unsigned char *data2, + unsigned char *data3, unsigned offset, + unsigned length) +__contract__( + requires(0 <= offset && offset <= MLK_KECCAK_LANES * sizeof(uint64_t) && + 0 <= length && length <= MLK_KECCAK_LANES * sizeof(uint64_t) - offset) + requires(memory_no_alias(state, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) + requires(memory_no_alias(data0, length)) + requires(memory_no_alias(data1, length)) + requires(memory_no_alias(data2, length)) + requires(memory_no_alias(data3, length)) + assigns(memory_slice(data0, length)) + assigns(memory_slice(data1, length)) + assigns(memory_slice(data2, length)) + assigns(memory_slice(data3, length)) +); + +#define mlk_keccakf1600x4_xor_bytes MLK_NAMESPACE(keccakf1600x4_xor_bytes) +void mlk_keccakf1600x4_xor_bytes(uint64_t *state, const unsigned char *data0, + const unsigned char *data1, + const unsigned char *data2, + const unsigned char *data3, unsigned offset, + unsigned length) +__contract__( + requires(0 <= offset && offset <= MLK_KECCAK_LANES * sizeof(uint64_t) && + 0 <= length && length <= MLK_KECCAK_LANES * sizeof(uint64_t) - offset) + requires(memory_no_alias(state, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) + requires(memory_no_alias(data0, length)) + /* Case 1: all input buffers are distinct; Case 2: All input buffers are the same */ + requires((data0 == data1 && + data0 == data2 && + data0 == data3) || + (memory_no_alias(data1, length) && + memory_no_alias(data2, length) && + memory_no_alias(data3, length))) + assigns(memory_slice(state, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) +); + + +#define mlk_keccakf1600x4_permute MLK_NAMESPACE(keccakf1600x4_permute) +void mlk_keccakf1600x4_permute(uint64_t *state) +__contract__( + requires(memory_no_alias(state, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) + assigns(memory_slice(state, sizeof(uint64_t) * MLK_KECCAK_LANES * MLK_KECCAK_WAY)) +); + +#define mlk_keccakf1600_permute MLK_NAMESPACE(keccakf1600_permute) +void mlk_keccakf1600_permute(uint64_t *state) +__contract__( + requires(memory_no_alias(state, sizeof(uint64_t) * MLK_KECCAK_LANES)) + assigns(memory_slice(state, sizeof(uint64_t) * MLK_KECCAK_LANES)) +); + +#endif /* !MLK_FIPS202_KECCAKF1600_H */ diff --git a/mlkem_native/src/indcpa.c b/mlkem_native/src/indcpa.c new file mode 100644 index 0000000..d3dc364 --- /dev/null +++ b/mlkem_native/src/indcpa.c @@ -0,0 +1,652 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#include "indcpa.h" + +#include "debug.h" +#include "randombytes.h" +#include "sampling.h" +#include "symmetric.h" + +/* Parameter set namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying parameter sets) + * within a single compilation unit. */ +#define mlk_pack_pk MLK_ADD_PARAM_SET(mlk_pack_pk) +#define mlk_unpack_pk MLK_ADD_PARAM_SET(mlk_unpack_pk) +#define mlk_pack_sk MLK_ADD_PARAM_SET(mlk_pack_sk) +#define mlk_unpack_sk MLK_ADD_PARAM_SET(mlk_unpack_sk) +#define mlk_pack_ciphertext MLK_ADD_PARAM_SET(mlk_pack_ciphertext) +#define mlk_unpack_ciphertext MLK_ADD_PARAM_SET(mlk_unpack_ciphertext) +#define mlk_matvec_mul MLK_ADD_PARAM_SET(mlk_matvec_mul) +#define mlk_polyvec_permute_bitrev_to_custom \ + MLK_ADD_PARAM_SET(mlk_polyvec_permute_bitrev_to_custom) +#define mlk_polymat_permute_bitrev_to_custom \ + MLK_ADD_PARAM_SET(mlk_polymat_permute_bitrev_to_custom) +#define mlk_keypair_getnoise MLK_ADD_PARAM_SET(mlk_keypair_getnoise) +/* End of parameter set namespacing */ + +/************************************************* + * Name: mlk_pack_pk + * + * Description: Serialize the public key as concatenation of the + * serialized vector of polynomials pk + * and the public seed used to generate the matrix A. + * + * Arguments: uint8_t *r: pointer to the output serialized public key + * mlk_polyvec pk: pointer to the input public-key mlk_polyvec. + * Must have coefficients within [0,..,q-1]. + * const uint8_t *seed: pointer to the input public seed + * + * Specification: + * Implements @[FIPS203, Algorithm 13 (K-PKE.KeyGen), L19] + * + **************************************************/ +static void mlk_pack_pk(uint8_t r[MLKEM_INDCPA_PUBLICKEYBYTES], + const mlk_polyvec *pk, + const uint8_t seed[MLKEM_SYMBYTES]) +{ + mlk_assert_bound_2d(pk->vec, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + mlk_polyvec_tobytes(r, pk); + mlk_memcpy(r + MLKEM_POLYVECBYTES, seed, MLKEM_SYMBYTES); +} + +/************************************************* + * Name: mlk_unpack_pk + * + * Description: De-serialize public key from a byte array; + * approximate inverse of mlk_pack_pk + * + * Arguments: - mlk_polyvec pk: pointer to output public-key polynomial + * vector Coefficients will be normalized to [0,..,q-1]. + * - uint8_t *seed: pointer to output seed to generate matrix A + * - const uint8_t *packedpk: pointer to input serialized public + * key. + * + * Specification: + * Implements @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L2-3] + * + **************************************************/ +static void mlk_unpack_pk(mlk_polyvec *pk, uint8_t seed[MLKEM_SYMBYTES], + const uint8_t packedpk[MLKEM_INDCPA_PUBLICKEYBYTES]) +{ + mlk_polyvec_frombytes(pk, packedpk); + mlk_memcpy(seed, packedpk + MLKEM_POLYVECBYTES, MLKEM_SYMBYTES); + + /* NOTE: If a modulus check was conducted on the PK, we know at this + * point that the coefficients of `pk` are unsigned canonical. The + * specifications and proofs, however, do _not_ assume this, and instead + * work with the easily provable bound by MLKEM_UINT12_LIMIT. */ +} + +/************************************************* + * Name: mlk_pack_sk + * + * Description: Serialize the secret key + * + * Arguments: - uint8_t *r: pointer to output serialized secret key + * - mlk_polyvec sk: pointer to input vector of polynomials + * (secret key) + * + * Specification: + * Implements @[FIPS203, Algorithm 13 (K-PKE.KeyGen), L20] + * + **************************************************/ +static void mlk_pack_sk(uint8_t r[MLKEM_INDCPA_SECRETKEYBYTES], + const mlk_polyvec *sk) +{ + mlk_assert_bound_2d(sk->vec, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + mlk_polyvec_tobytes(r, sk); +} + +/************************************************* + * Name: mlk_unpack_sk + * + * Description: De-serialize the secret key; inverse of mlk_pack_sk + * + * Arguments: - mlk_polyvec sk: pointer to output vector of polynomials + * (secret key) + * - const uint8_t *packedsk: pointer to input serialized secret + * key + * + * Specification: + * Implements @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L5] + * + **************************************************/ +static void mlk_unpack_sk(mlk_polyvec *sk, + const uint8_t packedsk[MLKEM_INDCPA_SECRETKEYBYTES]) +{ + mlk_polyvec_frombytes(sk, packedsk); +} + +/************************************************* + * Name: mlk_pack_ciphertext + * + * Description: Serialize the ciphertext as concatenation of the + * compressed and serialized vector of polynomials b + * and the compressed and serialized polynomial v + * + * Arguments: uint8_t *r: pointer to the output serialized ciphertext + * mlk_poly *pk: pointer to the input vector of polynomials b + * mlk_poly *v: pointer to the input polynomial v + * + * Specification: + * Implements @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L22-23] + * + **************************************************/ +static void mlk_pack_ciphertext(uint8_t r[MLKEM_INDCPA_BYTES], + const mlk_polyvec *b, mlk_poly *v) +{ + mlk_polyvec_compress_du(r, b); + mlk_poly_compress_dv(r + MLKEM_POLYVECCOMPRESSEDBYTES_DU, v); +} + +/************************************************* + * Name: mlk_unpack_ciphertext + * + * Description: De-serialize and decompress ciphertext from a byte array; + * approximate inverse of mlk_pack_ciphertext + * + * Arguments: - mlk_polyvec b: pointer to the output vector of polynomials b + * - mlk_poly *v: pointer to the output polynomial v + * - const uint8_t *c: pointer to the input serialized ciphertext + * + * Specification: + * Implements @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L1-4] + * + **************************************************/ +static void mlk_unpack_ciphertext(mlk_polyvec *b, mlk_poly *v, + const uint8_t c[MLKEM_INDCPA_BYTES]) +{ + mlk_polyvec_decompress_du(b, c); + mlk_poly_decompress_dv(v, c + MLKEM_POLYVECCOMPRESSEDBYTES_DU); +} + +/* Helper function to ensure that the polynomial entries in the output + * of gen_matrix use the standard (bitreversed) ordering of coefficients. + * No-op unless a native backend with a custom ordering is used. + * + * We don't inline this into gen_matrix to avoid having to split the CBMC + * proof for gen_matrix based on MLK_USE_NATIVE_NTT_CUSTOM_ORDER. */ +static void mlk_polyvec_permute_bitrev_to_custom(mlk_polyvec *v) +__contract__( + /* We don't specify that this should be a permutation, but only + * that it does not change the bound established at the end of mlk_gen_matrix. */ + requires(memory_no_alias(v, sizeof(mlk_polyvec))) + requires(forall(x, 0, MLKEM_K, + array_bound(v->vec[x].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) + assigns(memory_slice(v, sizeof(mlk_polyvec))) + ensures(forall(x, 0, MLKEM_K, + array_bound(v->vec[x].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))) +{ +#if defined(MLK_USE_NATIVE_NTT_CUSTOM_ORDER) + unsigned i; + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, memory_slice(v, sizeof(mlk_polyvec))) + invariant(i <= MLKEM_K) + invariant(forall(x, 0, MLKEM_K, + array_bound(v->vec[x].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))) + { + mlk_poly_permute_bitrev_to_custom(v->vec[i].coeffs); + } +#else /* MLK_USE_NATIVE_NTT_CUSTOM_ORDER */ + /* Nothing to do */ + (void)v; +#endif /* !MLK_USE_NATIVE_NTT_CUSTOM_ORDER */ +} + +static void mlk_polymat_permute_bitrev_to_custom(mlk_polymat *a) +__contract__( + /* We don't specify that this should be a permutation, but only + * that it does not change the bound established at the end of mlk_gen_matrix. */ + requires(memory_no_alias(a, sizeof(mlk_polymat))) + requires(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K, + array_bound(a->vec[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))) + assigns(memory_slice(a, sizeof(mlk_polymat))) + ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K, + array_bound(a->vec[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q))))) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, memory_slice(a, sizeof(mlk_polymat))) + invariant(i <= MLKEM_K) + invariant(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K, + array_bound(a->vec[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q))))) + { + mlk_polyvec_permute_bitrev_to_custom(&a->vec[i]); + } +} + +/* Reference: `gen_matrix()` in the reference implementation @[REF]. + * - We use a special subroutine to generate 4 polynomials + * at a time, to be able to leverage batched Keccak-f1600 + * implementations. The reference implementation generates + * one matrix entry a time. + * + * Not static for benchmarking */ +MLK_INTERNAL_API +void mlk_gen_matrix(mlk_polymat *a, const uint8_t seed[MLKEM_SYMBYTES], + int transposed) +{ + unsigned i, j; + MLK_ALIGN uint8_t seed_ext[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)]; + + for (j = 0; j < 4; j++) + { + mlk_memcpy(seed_ext[j], seed, MLKEM_SYMBYTES); + } + +#if !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) + /* Sample 4 matrix entries a time. */ + for (i = 0; i < (MLKEM_K * MLKEM_K / 4) * 4; i += 4) + { + for (j = 0; j < 4; j++) + { + uint8_t x, y; + /* MLKEM_K <= 4, so the values fit in uint8_t. */ + x = (uint8_t)((i + j) / MLKEM_K); + y = (uint8_t)((i + j) % MLKEM_K); + if (transposed) + { + seed_ext[j][MLKEM_SYMBYTES + 0] = x; + seed_ext[j][MLKEM_SYMBYTES + 1] = y; + } + else + { + seed_ext[j][MLKEM_SYMBYTES + 0] = y; + seed_ext[j][MLKEM_SYMBYTES + 1] = x; + } + } + + mlk_poly_rej_uniform_x4(&a->vec[i / MLKEM_K].vec[i % MLKEM_K], + &a->vec[(i + 1) / MLKEM_K].vec[(i + 1) % MLKEM_K], + &a->vec[(i + 2) / MLKEM_K].vec[(i + 2) % MLKEM_K], + &a->vec[(i + 3) / MLKEM_K].vec[(i + 3) % MLKEM_K], + seed_ext); + } +#else /* !MLK_CONFIG_SERIAL_FIPS202_ONLY */ + /* When using serial FIPS202, sample all entries individually. */ + i = 0; +#endif /* MLK_CONFIG_SERIAL_FIPS202_ONLY */ + + /* For MLKEM_K == 3, sample the last entry individually. + * When MLK_CONFIG_SERIAL_FIPS202_ONLY is set, sample all entries + * individually. */ + for (; i < MLKEM_K * MLKEM_K; i++) + { + uint8_t x, y; + /* MLKEM_K <= 4, so the values fit in uint8_t. */ + x = (uint8_t)(i / MLKEM_K); + y = (uint8_t)(i % MLKEM_K); + + if (transposed) + { + seed_ext[0][MLKEM_SYMBYTES + 0] = x; + seed_ext[0][MLKEM_SYMBYTES + 1] = y; + } + else + { + seed_ext[0][MLKEM_SYMBYTES + 0] = y; + seed_ext[0][MLKEM_SYMBYTES + 1] = x; + } + + mlk_poly_rej_uniform(&a->vec[i / MLKEM_K].vec[i % MLKEM_K], seed_ext[0]); + } + + mlk_assert(i == MLKEM_K * MLKEM_K); + + /* + * The public matrix is generated in NTT domain. If the native backend + * uses a custom order in NTT domain, permute A accordingly. + */ + mlk_polymat_permute_bitrev_to_custom(a); + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(seed_ext, sizeof(seed_ext)); +} + +/************************************************* + * Name: mlk_matvec_mul + * + * Description: Computes matrix-vector product in NTT domain, + * via Montgomery multiplication. + * + * Arguments: - mlk_polyvec out: Pointer to output polynomial vector + * - mlk_polymat a: Input matrix. Must be in NTT domain + * and have coefficients of absolute value < 4096. + * - mlk_polyvec v: Input polynomial vector. Must be in NTT + * domain. + * - mlk_polyvec vc: Mulcache for v, computed via + * mlk_polyvec_mulcache_compute(). + * + * Specification: Implements @[FIPS203, Section 2.4.7, Eq (2.12), (2.13)] + * + **************************************************/ +static void mlk_matvec_mul(mlk_polyvec *out, const mlk_polymat *a, + const mlk_polyvec *v, const mlk_polyvec_mulcache *vc) +__contract__( + requires(memory_no_alias(out, sizeof(mlk_polyvec))) + requires(memory_no_alias(a, sizeof(mlk_polymat))) + requires(memory_no_alias(v, sizeof(mlk_polyvec))) + requires(memory_no_alias(vc, sizeof(mlk_polyvec_mulcache))) + requires(forall(k0, 0, MLKEM_K, + forall(k1, 0, MLKEM_K, + array_bound(a->vec[k0].vec[k1].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT)))) + assigns(memory_slice(out, sizeof(mlk_polyvec)))) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, memory_slice(out, sizeof(mlk_polyvec))) + invariant(i <= MLKEM_K)) + { + mlk_polyvec_basemul_acc_montgomery_cached(&out->vec[i], &a->vec[i], v, vc); + } +} + +/************************************************* + * Name: mlk_keypair_getnoise + * + * Description: Computes and fills the pv and e polyvec + * structures needed by mlk_keypair_derand() + * + * Arguments: - pv: Pointer to output polynomial vector + * - e: Pointer to output polynomial vector + * - seed: seed bytes for sampling + * + * Specification: Implements @[FIPS203, Algorithm 13 (K-PKE.KeyGen)]. + * steps 8 - 15 + **************************************************/ +static void mlk_keypair_getnoise(mlk_polyvec *pv, mlk_polyvec *e, + const uint8_t seed[MLKEM_SYMBYTES]) +__contract__( + requires(memory_no_alias(pv, sizeof(mlk_polyvec))) + requires(memory_no_alias(e, sizeof(mlk_polyvec))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(memory_slice(pv, sizeof(mlk_polyvec))) + assigns(memory_slice(e, sizeof(mlk_polyvec))) + ensures(forall(k0, 0, MLKEM_K, array_abs_bound(pv->vec[k0].coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1))) + ensures(forall(k1, 0, MLKEM_K, array_abs_bound(e->vec[k1].coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1))) +) +{ +#if MLKEM_K == 2 + mlk_poly_getnoise_eta1_4x(&pv->vec[0], &pv->vec[1], &e->vec[0], &e->vec[1], + seed, 0, 1, 2, 3); +#elif MLKEM_K == 3 + /* + * Only the first three output buffers are needed. + */ + mlk_poly_getnoise_eta1_4x(&pv->vec[0], &pv->vec[1], &pv->vec[2], NULL, seed, + 0, 1, 2, 0xFF /* irrelevant */); + /* Same here */ + mlk_poly_getnoise_eta1_4x(&e->vec[0], &e->vec[1], &e->vec[2], NULL, seed, 3, + 4, 5, 0xFF /* irrelevant */); +#elif MLKEM_K == 4 + mlk_poly_getnoise_eta1_4x(&pv->vec[0], &pv->vec[1], &pv->vec[2], &pv->vec[3], + seed, 0, 1, 2, 3); + mlk_poly_getnoise_eta1_4x(&e->vec[0], &e->vec[1], &e->vec[2], &e->vec[3], + seed, 4, 5, 6, 7); +#endif /* MLKEM_K == 4 */ +} + + +/* Reference: `indcpa_keypair_derand()` in the reference implementation @[REF]. + * - We use x4-batched versions of `poly_getnoise` to leverage + * batched x4-batched Keccak-f1600. + * - We use a different implementation of `gen_matrix()` which + * uses x4-batched Keccak-f1600 (see `mlk_gen_matrix()` above). + * - We use a mulcache to speed up matrix-vector multiplication. + * - We include buffer zeroization. + */ +MLK_INTERNAL_API +int mlk_indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + const uint8_t *publicseed; + const uint8_t *noiseseed; + MLK_ALLOC(buf, uint8_t, 2 * MLKEM_SYMBYTES, context); + MLK_ALLOC(coins_with_domain_separator, uint8_t, MLKEM_SYMBYTES + 1, context); + MLK_ALLOC(a, mlk_polymat, 1, context); + MLK_ALLOC(e, mlk_polyvec, 1, context); + MLK_ALLOC(pkpv, mlk_polyvec, 1, context); + MLK_ALLOC(skpv, mlk_polyvec, 1, context); + MLK_ALLOC(skpv_cache, mlk_polyvec_mulcache, 1, context); + + if (buf == NULL || coins_with_domain_separator == NULL || a == NULL || + e == NULL || pkpv == NULL || skpv == NULL || skpv_cache == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + publicseed = buf; + noiseseed = buf + MLKEM_SYMBYTES; + + /* Concatenate coins with MLKEM_K for domain separation of security levels */ + mlk_memcpy(coins_with_domain_separator, coins, MLKEM_SYMBYTES); + coins_with_domain_separator[MLKEM_SYMBYTES] = MLKEM_K; + + mlk_hash_g(buf, coins_with_domain_separator, MLKEM_SYMBYTES + 1); + + /* + * Declassify the public seed. + * Required to use it in conditional-branches in rejection sampling. + * This is needed because all output of randombytes is marked as secret + * (=undefined) + */ + MLK_CT_TESTING_DECLASSIFY(publicseed, MLKEM_SYMBYTES); + + mlk_gen_matrix(a, publicseed, 0 /* no transpose */); + + mlk_keypair_getnoise(skpv, e, noiseseed); + + mlk_polyvec_ntt(skpv); + mlk_polyvec_ntt(e); + + mlk_polyvec_mulcache_compute(skpv_cache, skpv); + mlk_matvec_mul(pkpv, a, skpv, skpv_cache); + mlk_polyvec_tomont(pkpv); + + mlk_polyvec_add(pkpv, e); + mlk_polyvec_reduce(pkpv); + mlk_polyvec_reduce(skpv); + + mlk_pack_sk(sk, skpv); + mlk_pack_pk(pk, pkpv, publicseed); + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(skpv_cache, mlk_polyvec_mulcache, 1, context); + MLK_FREE(skpv, mlk_polyvec, 1, context); + MLK_FREE(pkpv, mlk_polyvec, 1, context); + MLK_FREE(e, mlk_polyvec, 1, context); + MLK_FREE(a, mlk_polymat, 1, context); + MLK_FREE(coins_with_domain_separator, uint8_t, MLKEM_SYMBYTES + 1, context); + MLK_FREE(buf, uint8_t, 2 * MLKEM_SYMBYTES, context); + return ret; +} + +/* Reference: `indcpa_enc()` in the reference implementation @[REF]. + * - We use x4-batched versions of `poly_getnoise` to leverage + * batched x4-batched Keccak-f1600. + * - We use a different implementation of `gen_matrix()` which + * uses x4-batched Keccak-f1600 (see `mlk_gen_matrix()` above). + * - We use a mulcache to speed up matrix-vector multiplication. + * - We include buffer zeroization. + */ +MLK_INTERNAL_API +int mlk_indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(seed, uint8_t, MLKEM_SYMBYTES, context); + MLK_ALLOC(at, mlk_polymat, 1, context); + MLK_ALLOC(sp, mlk_polyvec, 1, context); + MLK_ALLOC(pkpv, mlk_polyvec, 1, context); + MLK_ALLOC(ep, mlk_polyvec, 1, context); + MLK_ALLOC(b, mlk_polyvec, 1, context); + MLK_ALLOC(v, mlk_poly, 1, context); + MLK_ALLOC(k, mlk_poly, 1, context); + MLK_ALLOC(epp, mlk_poly, 1, context); + MLK_ALLOC(sp_cache, mlk_polyvec_mulcache, 1, context); + + if (seed == NULL || at == NULL || sp == NULL || pkpv == NULL || ep == NULL || + b == NULL || v == NULL || k == NULL || epp == NULL || sp_cache == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + mlk_unpack_pk(pkpv, seed, pk); + mlk_poly_frommsg(k, m); + + /* + * Declassify the public seed. + * Required to use it in conditional-branches in rejection sampling. + * This is needed because in re-encryption the publicseed originated from sk + * which is marked undefined. + */ + MLK_CT_TESTING_DECLASSIFY(seed, MLKEM_SYMBYTES); + + mlk_gen_matrix(at, seed, 1 /* transpose */); + +#if MLKEM_K == 2 + mlk_poly_getnoise_eta1122_4x(&sp->vec[0], &sp->vec[1], &ep->vec[0], + &ep->vec[1], coins, 0, 1, 2, 3); + mlk_poly_getnoise_eta2(epp, coins, 4); +#elif MLKEM_K == 3 + /* + * In this call, only the first three output buffers are needed. + * The last parameter is a dummy that's overwritten later. + */ + mlk_poly_getnoise_eta1_4x(&sp->vec[0], &sp->vec[1], &sp->vec[2], NULL, coins, + 0, 1, 2, 0xFF /* irrelevant */); + /* The fourth output buffer in this call _is_ used. */ + mlk_poly_getnoise_eta2_4x(&ep->vec[0], &ep->vec[1], &ep->vec[2], epp, coins, + 3, 4, 5, 6); +#elif MLKEM_K == 4 + mlk_poly_getnoise_eta1_4x(&sp->vec[0], &sp->vec[1], &sp->vec[2], &sp->vec[3], + coins, 0, 1, 2, 3); + mlk_poly_getnoise_eta2_4x(&ep->vec[0], &ep->vec[1], &ep->vec[2], &ep->vec[3], + coins, 4, 5, 6, 7); + mlk_poly_getnoise_eta2(epp, coins, 8); +#endif /* MLKEM_K == 4 */ + + mlk_polyvec_ntt(sp); + + mlk_polyvec_mulcache_compute(sp_cache, sp); + mlk_matvec_mul(b, at, sp, sp_cache); + mlk_polyvec_basemul_acc_montgomery_cached(v, pkpv, sp, sp_cache); + + mlk_polyvec_invntt_tomont(b); + mlk_poly_invntt_tomont(v); + + mlk_polyvec_add(b, ep); + mlk_poly_add(v, epp); + mlk_poly_add(v, k); + + mlk_polyvec_reduce(b); + mlk_poly_reduce(v); + + mlk_pack_ciphertext(c, b, v); + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(sp_cache, mlk_polyvec_mulcache, 1, context); + MLK_FREE(epp, mlk_poly, 1, context); + MLK_FREE(k, mlk_poly, 1, context); + MLK_FREE(v, mlk_poly, 1, context); + MLK_FREE(b, mlk_polyvec, 1, context); + MLK_FREE(ep, mlk_polyvec, 1, context); + MLK_FREE(pkpv, mlk_polyvec, 1, context); + MLK_FREE(sp, mlk_polyvec, 1, context); + MLK_FREE(at, mlk_polymat, 1, context); + MLK_FREE(seed, uint8_t, MLKEM_SYMBYTES, context); + return ret; +} + +/* Reference: `indcpa_dec()` in the reference implementation @[REF]. + * - We use a mulcache for the scalar product. + * - We include buffer zeroization. */ +MLK_INTERNAL_API +int mlk_indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(b, mlk_polyvec, 1, context); + MLK_ALLOC(skpv, mlk_polyvec, 1, context); + MLK_ALLOC(v, mlk_poly, 1, context); + MLK_ALLOC(sb, mlk_poly, 1, context); + MLK_ALLOC(b_cache, mlk_polyvec_mulcache, 1, context); + + if (b == NULL || skpv == NULL || v == NULL || sb == NULL || b_cache == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + mlk_unpack_ciphertext(b, v, c); + mlk_unpack_sk(skpv, sk); + + mlk_polyvec_ntt(b); + mlk_polyvec_mulcache_compute(b_cache, b); + mlk_polyvec_basemul_acc_montgomery_cached(sb, skpv, b, b_cache); + mlk_poly_invntt_tomont(sb); + + mlk_poly_sub(v, sb); + mlk_poly_reduce(v); + + mlk_poly_tomsg(m, v); + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(b_cache, mlk_polyvec_mulcache, 1, context); + MLK_FREE(sb, mlk_poly, 1, context); + MLK_FREE(v, mlk_poly, 1, context); + MLK_FREE(skpv, mlk_polyvec, 1, context); + MLK_FREE(b, mlk_polyvec, 1, context); + return ret; +} + +/* To facilitate single-compilation-unit (SCU) builds, undefine all macros. + * Don't modify by hand -- this is auto-generated by scripts/autogen. */ +#undef mlk_pack_pk +#undef mlk_unpack_pk +#undef mlk_pack_sk +#undef mlk_unpack_sk +#undef mlk_pack_ciphertext +#undef mlk_unpack_ciphertext +#undef mlk_matvec_mul +#undef mlk_polyvec_permute_bitrev_to_custom +#undef mlk_polymat_permute_bitrev_to_custom +#undef mlk_keypair_getnoise diff --git a/mlkem_native/src/indcpa.h b/mlkem_native/src/indcpa.h new file mode 100644 index 0000000..b31756d --- /dev/null +++ b/mlkem_native/src/indcpa.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + */ + +#ifndef MLK_INDCPA_H +#define MLK_INDCPA_H + +#include "cbmc.h" +#include "common.h" +#include "poly_k.h" + +#define mlk_gen_matrix MLK_NAMESPACE_K(gen_matrix) +/************************************************* + * Name: mlk_gen_matrix + * + * Description: Deterministically generate matrix A (or the transpose of A) + * from a seed. Entries of the matrix are polynomials that look + * uniformly random. Performs rejection sampling on output of + * a XOF + * + * Arguments: - mlk_polymat a: pointer to output matrix A + * - const uint8_t *seed: pointer to input seed + * - int transposed: boolean deciding whether A or A^T is generated + * + * Specification: Implements @[FIPS203, Algorithm 13 (K-PKE.KeyGen), L3-7] + * and @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L4-8]. + * The `transposed` parameter only affects internal presentation. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_gen_matrix(mlk_polymat *a, const uint8_t seed[MLKEM_SYMBYTES], + int transposed) +__contract__( + requires(memory_no_alias(a, sizeof(mlk_polymat))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires(transposed == 0 || transposed == 1) + assigns(memory_slice(a, sizeof(mlk_polymat))) + ensures(forall(x, 0, MLKEM_K, forall(y, 0, MLKEM_K, + array_bound(a->vec[x].vec[y].coeffs, 0, MLKEM_N, 0, MLKEM_Q)))) +); + +#define mlk_indcpa_keypair_derand \ + MLK_NAMESPACE_K(indcpa_keypair_derand) MLK_CONTEXT_PARAMETERS_3 +/************************************************* + * Name: mlk_indcpa_keypair_derand + * + * Description: Generates public and private key for the CPA-secure + * public-key encryption scheme underlying ML-KEM + * + * Arguments: - uint8_t *pk: pointer to output public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES bytes) + * - uint8_t *sk: pointer to output private key + * (of length MLKEM_INDCPA_SECRETKEYBYTES bytes) + * - const uint8_t *coins: pointer to input randomness + * (of length MLKEM_SYMBYTES bytes) + * + * Specification: Implements @[FIPS203, Algorithm 13 (K-PKE.KeyGen)]. + * + **************************************************/ +MLK_INTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_indcpa_keypair_derand(uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(memory_slice(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + assigns(memory_slice(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY || + return_value == MLK_ERR_RNG_FAIL) +); + +#define mlk_indcpa_enc MLK_NAMESPACE_K(indcpa_enc) MLK_CONTEXT_PARAMETERS_4 +/************************************************* + * Name: mlk_indcpa_enc + * + * Description: Encryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *c: pointer to output ciphertext + * (of length MLKEM_INDCPA_BYTES bytes) + * - const uint8_t *m: pointer to input message + * (of length MLKEM_INDCPA_MSGBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (of length MLKEM_INDCPA_PUBLICKEYBYTES) + * - const uint8_t *coins: pointer to input random coins used as + * seed (of length MLKEM_SYMBYTES) to deterministically generate + * all randomness + * + * Specification: Implements @[FIPS203, Algorithm 14 (K-PKE.Encrypt)]. + * + **************************************************/ +MLK_INTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_indcpa_enc(uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t pk[MLKEM_INDCPA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(pk, MLKEM_INDCPA_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(memory_slice(c, MLKEM_INDCPA_BYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY) +); + +#define mlk_indcpa_dec MLK_NAMESPACE_K(indcpa_dec) MLK_CONTEXT_PARAMETERS_3 +/************************************************* + * Name: mlk_indcpa_dec + * + * Description: Decryption function of the CPA-secure + * public-key encryption scheme underlying Kyber. + * + * Arguments: - uint8_t *m: pointer to output decrypted message + * (of length MLKEM_INDCPA_MSGBYTES) + * - const uint8_t *c: pointer to input ciphertext + * (of length MLKEM_INDCPA_BYTES) + * - const uint8_t *sk: pointer to input secret key + * (of length MLKEM_INDCPA_SECRETKEYBYTES) + * + * Specification: Implements @[FIPS203, Algorithm 15 (K-PKE.Decrypt)]. + * + **************************************************/ +MLK_INTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_indcpa_dec(uint8_t m[MLKEM_INDCPA_MSGBYTES], + const uint8_t c[MLKEM_INDCPA_BYTES], + const uint8_t sk[MLKEM_INDCPA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(c, MLKEM_INDCPA_BYTES)) + requires(memory_no_alias(m, MLKEM_INDCPA_MSGBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCPA_SECRETKEYBYTES)) + assigns(memory_slice(m, MLKEM_INDCPA_MSGBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY) +); + +#endif /* !MLK_INDCPA_H */ diff --git a/mlkem_native/src/kem.c b/mlkem_native/src/kem.c new file mode 100644 index 0000000..3c82d6d --- /dev/null +++ b/mlkem_native/src/kem.c @@ -0,0 +1,446 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS140_3_IG] + * Implementation Guidance for FIPS 140-3 and the Cryptographic Module + * Validation Program + * National Institute of Standards and Technology + * https://csrc.nist.gov/projects/cryptographic-module-validation-program/fips-140-3-ig-announcements + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#include "kem.h" + +#include "indcpa.h" +#include "randombytes.h" +#include "symmetric.h" +#include "verify.h" + +/* Parameter set namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying security levels) + * within a single compilation unit. */ +#define mlk_check_pct MLK_ADD_PARAM_SET(mlk_check_pct) MLK_CONTEXT_PARAMETERS_2 +/* End of parameter set namespacing */ + +/* Reference: Not implemented in the reference implementation @[REF]. */ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(p, mlk_polyvec, 1, context); + MLK_ALLOC(p_reencoded, uint8_t, MLKEM_POLYVECBYTES, context); + + if (p == NULL || p_reencoded == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + mlk_polyvec_frombytes(p, pk); + mlk_polyvec_reduce(p); + mlk_polyvec_tobytes(p_reencoded, p); + + /* We use a constant-time memcmp here to avoid having to + * declassify the PK before the PCT has succeeded. */ + ret = mlk_ct_memcmp(pk, p_reencoded, MLKEM_POLYVECBYTES) ? MLK_ERR_FAIL : 0; + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(p_reencoded, uint8_t, MLKEM_POLYVECBYTES, context); + MLK_FREE(p, mlk_polyvec, 1, context); + return ret; +} + + +/* Reference: Not implemented in the reference implementation @[REF]. */ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(test, uint8_t, MLKEM_SYMBYTES, context); + + if (test == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + /* + * The parts of `sk` being hashed and compared here are public, so + * no public information is leaked through the runtime or the return value + * of this function. + */ + + /* Declassify the public part of the secret key */ + MLK_CT_TESTING_DECLASSIFY(sk + MLKEM_INDCPA_SECRETKEYBYTES, + MLKEM_INDCCA_PUBLICKEYBYTES); + MLK_CT_TESTING_DECLASSIFY( + sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, MLKEM_SYMBYTES); + + mlk_hash_h(test, sk + MLKEM_INDCPA_SECRETKEYBYTES, + MLKEM_INDCCA_PUBLICKEYBYTES); + /* This doesn't have to be a constant-time memcmp, but it's the only place + * in the library where a normal memcmp would be used otherwise, so for sake + * of minimizing stdlib dependency, we use our constant-time one anyway. */ + ret = mlk_ct_memcmp(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, + test, MLKEM_SYMBYTES) + ? MLK_ERR_FAIL + : 0; + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(test, uint8_t, MLKEM_SYMBYTES, context); + return ret; +} + +MLK_MUST_CHECK_RETURN_VALUE +static int mlk_check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES], + uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY || + return_value == MLK_ERR_RNG_FAIL) +); + +#if defined(MLK_CONFIG_KEYGEN_PCT) +/* Specification: + * Partially implements 'Pairwise Consistency Test' @[FIPS140_3_IG, p.87] and + * @[FIPS203, Section 7.1, Pairwise Consistency]. */ + +/* Reference: Not implemented in the reference implementation @[REF]. */ +MLK_MUST_CHECK_RETURN_VALUE +static int mlk_check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES], + uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(ct, uint8_t, MLKEM_INDCCA_CIPHERTEXTBYTES, context); + MLK_ALLOC(ss_enc, uint8_t, MLKEM_SSBYTES, context); + MLK_ALLOC(ss_dec, uint8_t, MLKEM_SSBYTES, context); + + if (ct == NULL || ss_enc == NULL || ss_dec == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + ret = mlk_kem_enc(ct, ss_enc, pk, context); + if (ret != 0) + { + goto cleanup; + } + + ret = mlk_kem_dec(ss_dec, ct, sk, context); + if (ret != 0) + { + goto cleanup; + } + +#if defined(MLK_CONFIG_KEYGEN_PCT_BREAKAGE_TEST) + /* Deliberately break PCT for testing purposes */ + if (mlk_break_pct()) + { + ss_enc[0] = ~ss_enc[0]; + } +#endif /* MLK_CONFIG_KEYGEN_PCT_BREAKAGE_TEST */ + + ret = mlk_ct_memcmp(ss_enc, ss_dec, MLKEM_SSBYTES); + /* The result of the PCT is public. */ + MLK_CT_TESTING_DECLASSIFY(&ret, sizeof(ret)); + + if (ret != 0) + { + ret = MLK_ERR_FAIL; + } + +cleanup: + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(ss_dec, uint8_t, MLKEM_SSBYTES, context); + MLK_FREE(ss_enc, uint8_t, MLKEM_SSBYTES, context); + MLK_FREE(ct, uint8_t, MLKEM_INDCCA_CIPHERTEXTBYTES, context); + return ret; +} +#else /* MLK_CONFIG_KEYGEN_PCT */ +MLK_MUST_CHECK_RETURN_VALUE +static int mlk_check_pct(uint8_t const pk[MLKEM_INDCCA_PUBLICKEYBYTES], + uint8_t const sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + /* Skip PCT */ + ((void)pk); + ((void)sk); +#if defined(MLK_CONFIG_CONTEXT_PARAMETER) + ((void)context); +#endif + return 0; +} +#endif /* !MLK_CONFIG_KEYGEN_PCT */ + +/* Reference: `crypto_kem_keypair_derand()` in the reference implementation + * @[REF]. + * - We optionally include PCT which is not present in + * the reference code. */ +MLK_EXTERNAL_API +int mlk_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + const uint8_t coins[2 * MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret; + + ret = mlk_indcpa_keypair_derand(pk, sk, coins, context); + if (ret != 0) + { + goto cleanup; + } + + mlk_memcpy(sk + MLKEM_INDCPA_SECRETKEYBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES); + mlk_hash_h(sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, pk, + MLKEM_INDCCA_PUBLICKEYBYTES); + /* Value z for pseudo-random output on reject */ + mlk_memcpy(sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES, + coins + MLKEM_SYMBYTES, MLKEM_SYMBYTES); + + /* Declassify public key */ + MLK_CT_TESTING_DECLASSIFY(pk, MLKEM_INDCCA_PUBLICKEYBYTES); + + /* Pairwise Consistency Test (PCT) @[FIPS140_3_IG, p.87] */ + ret = mlk_check_pct(pk, sk, context); + if (ret != 0) + { + goto cleanup; + } + +cleanup: + if (ret != 0) + { + mlk_zeroize(pk, MLKEM_INDCCA_PUBLICKEYBYTES); + mlk_zeroize(sk, MLKEM_INDCCA_SECRETKEYBYTES); + } + + return ret; +} + +#if !defined(MLK_CONFIG_NO_RANDOMIZED_API) +/* Reference: `crypto_kem_keypair()` in the reference implementation @[REF] + * - We zeroize the stack buffer */ +MLK_EXTERNAL_API +int mlk_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(coins, uint8_t, 2 * MLKEM_SYMBYTES, context); + + if (coins == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + /* Acquire necessary randomness, and mark it as secret. */ + if (mlk_randombytes(coins, 2 * MLKEM_SYMBYTES) != 0) + { + ret = MLK_ERR_RNG_FAIL; + goto cleanup; + } + + MLK_CT_TESTING_SECRET(coins, 2 * MLKEM_SYMBYTES); + + ret = mlk_kem_keypair_derand(pk, sk, coins, context); + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(coins, uint8_t, 2 * MLKEM_SYMBYTES, context); + return ret; +} +#endif /* !MLK_CONFIG_NO_RANDOMIZED_API */ + +/* Reference: `crypto_kem_enc_derand()` in the reference implementation @[REF] + * - We include public key check + * - We include stack buffer zeroization */ +MLK_EXTERNAL_API +int mlk_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES], + uint8_t ss[MLKEM_SSBYTES], + const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(buf, uint8_t, 2 * MLKEM_SYMBYTES, context); + MLK_ALLOC(kr, uint8_t, 2 * MLKEM_SYMBYTES, context); + + if (buf == NULL || kr == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + /* Specification: Implements @[FIPS203, Section 7.2, Modulus check] */ + ret = mlk_kem_check_pk(pk, context); + if (ret != 0) + { + goto cleanup; + } + + mlk_memcpy(buf, coins, MLKEM_SYMBYTES); + + /* Multitarget countermeasure for coins + contributory KEM */ + mlk_hash_h(buf + MLKEM_SYMBYTES, pk, MLKEM_INDCCA_PUBLICKEYBYTES); + mlk_hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* coins are in kr+MLKEM_SYMBYTES */ + ret = mlk_indcpa_enc(ct, buf, pk, kr + MLKEM_SYMBYTES, context); + if (ret != 0) + { + goto cleanup; + } + + mlk_memcpy(ss, kr, MLKEM_SYMBYTES); + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(kr, uint8_t, 2 * MLKEM_SYMBYTES, context); + MLK_FREE(buf, uint8_t, 2 * MLKEM_SYMBYTES, context); + return ret; +} + +#if !defined(MLK_CONFIG_NO_RANDOMIZED_API) +/* Reference: `crypto_kem_enc()` in the reference implementation @[REF] + * - We include stack buffer zeroization */ +MLK_EXTERNAL_API +int mlk_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES], + uint8_t ss[MLKEM_SSBYTES], + const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + MLK_ALLOC(coins, uint8_t, MLKEM_SYMBYTES, context); + + if (coins == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + if (mlk_randombytes(coins, MLKEM_SYMBYTES) != 0) + { + ret = MLK_ERR_RNG_FAIL; + goto cleanup; + } + + MLK_CT_TESTING_SECRET(coins, MLKEM_SYMBYTES); + + ret = mlk_kem_enc_derand(ct, ss, pk, coins, context); + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(coins, uint8_t, MLKEM_SYMBYTES, context); + return ret; +} +#endif /* !MLK_CONFIG_NO_RANDOMIZED_API */ + +/* Reference: `crypto_kem_dec()` in the reference implementation @[REF] + * - We include secret key check + * - We include stack buffer zeroization */ +MLK_EXTERNAL_API +int mlk_kem_dec(uint8_t ss[MLKEM_SSBYTES], + const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES], + const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +{ + int ret = 0; + uint8_t fail; + const uint8_t *pk = sk + MLKEM_INDCPA_SECRETKEYBYTES; + MLK_ALLOC(buf, uint8_t, 2 * MLKEM_SYMBYTES, context); + MLK_ALLOC(kr, uint8_t, 2 * MLKEM_SYMBYTES, context); + MLK_ALLOC(tmp, uint8_t, MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES, + context); + + if (buf == NULL || kr == NULL || tmp == NULL) + { + ret = MLK_ERR_OUT_OF_MEMORY; + goto cleanup; + } + + /* Specification: Implements @[FIPS203, Section 7.3, Hash check] */ + ret = mlk_kem_check_sk(sk, context); + if (ret != 0) + { + goto cleanup; + } + + ret = mlk_indcpa_dec(buf, ct, sk, context); + if (ret != 0) + { + goto cleanup; + } + + /* Multitarget countermeasure for coins + contributory KEM */ + mlk_memcpy(buf + MLKEM_SYMBYTES, + sk + MLKEM_INDCCA_SECRETKEYBYTES - 2 * MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + mlk_hash_g(kr, buf, 2 * MLKEM_SYMBYTES); + + /* Recompute and compare ciphertext */ + /* coins are in kr+MLKEM_SYMBYTES */ + ret = mlk_indcpa_enc(tmp, buf, pk, kr + MLKEM_SYMBYTES, context); + if (ret != 0) + { + goto cleanup; + } + + fail = mlk_ct_memcmp(ct, tmp, MLKEM_INDCCA_CIPHERTEXTBYTES); + + /* Compute rejection key */ + mlk_memcpy(tmp, sk + MLKEM_INDCCA_SECRETKEYBYTES - MLKEM_SYMBYTES, + MLKEM_SYMBYTES); + mlk_memcpy(tmp + MLKEM_SYMBYTES, ct, MLKEM_INDCCA_CIPHERTEXTBYTES); + mlk_hash_j(ss, tmp, MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES); + + /* Copy true key to return buffer if fail is 0 */ + mlk_ct_cmov_zero(ss, kr, MLKEM_SYMBYTES, fail); + +cleanup: + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + MLK_FREE(tmp, uint8_t, MLKEM_SYMBYTES + MLKEM_INDCCA_CIPHERTEXTBYTES, + context); + MLK_FREE(kr, uint8_t, 2 * MLKEM_SYMBYTES, context); + MLK_FREE(buf, uint8_t, 2 * MLKEM_SYMBYTES, context); + + return ret; +} + +/* To facilitate single-compilation-unit (SCU) builds, undefine all macros. + * Don't modify by hand -- this is auto-generated by scripts/autogen. */ +#undef mlk_check_pct diff --git a/mlkem_native/src/kem.h b/mlkem_native/src/kem.h new file mode 100644 index 0000000..0502715 --- /dev/null +++ b/mlkem_native/src/kem.h @@ -0,0 +1,326 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#ifndef MLK_KEM_H +#define MLK_KEM_H + +#include "cbmc.h" +#include "common.h" +#include "sys.h" + +#if defined(MLK_CHECK_APIS) +/* Include to ensure consistency between internal kem.h + * and external mlkem_native.h. */ +#include "mlkem_native.h" + +#if MLKEM_INDCCA_SECRETKEYBYTES != \ + MLKEM_SECRETKEYBYTES(MLK_CONFIG_PARAMETER_SET) +#error Mismatch for SECRETKEYBYTES between kem.h and mlkem_native.h +#endif + +#if MLKEM_INDCCA_PUBLICKEYBYTES != \ + MLKEM_PUBLICKEYBYTES(MLK_CONFIG_PARAMETER_SET) +#error Mismatch for PUBLICKEYBYTES between kem.h and mlkem_native.h +#endif + +#if MLKEM_INDCCA_CIPHERTEXTBYTES != \ + MLKEM_CIPHERTEXTBYTES(MLK_CONFIG_PARAMETER_SET) +#error Mismatch for CIPHERTEXTBYTES between kem.h and mlkem_native.h +#endif + +#endif /* MLK_CHECK_APIS */ + +#define mlk_kem_keypair_derand \ + MLK_NAMESPACE_K(keypair_derand) MLK_CONTEXT_PARAMETERS_3 +#define mlk_kem_keypair MLK_NAMESPACE_K(keypair) MLK_CONTEXT_PARAMETERS_2 +#define mlk_kem_enc_derand MLK_NAMESPACE_K(enc_derand) MLK_CONTEXT_PARAMETERS_4 +#define mlk_kem_enc MLK_NAMESPACE_K(enc) MLK_CONTEXT_PARAMETERS_3 +#define mlk_kem_dec MLK_NAMESPACE_K(dec) MLK_CONTEXT_PARAMETERS_3 +#define mlk_kem_check_pk MLK_NAMESPACE_K(check_pk) MLK_CONTEXT_PARAMETERS_1 +#define mlk_kem_check_sk MLK_NAMESPACE_K(check_sk) MLK_CONTEXT_PARAMETERS_1 + +/************************************************* + * Name: mlk_kem_check_pk + * + * Description: Implements modulus check mandated by FIPS 203, + * i.e., ensures that coefficients are in [0,q-1]. + * + * Arguments: - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES + * bytes) + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the modulus check failed. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Section 7.2, 'modulus check'] + * + **************************************************/ + +/* Reference: Not implemented in the reference implementation @[REF]. */ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_check_pk(const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY) +); + + +/************************************************* + * Name: mlk_kem_check_sk + * + * Description: Implements public key hash check mandated by FIPS 203, + * i.e., ensures that + * sk[768π‘˜+32 ∢ 768π‘˜+64] = H(pk)= H(sk[384π‘˜ : 768π‘˜+32]) + * + * Arguments: - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES + * bytes) + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the public key hash check failed. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Section 7.3, 'hash check'] + * + **************************************************/ + +/* Reference: Not implemented in the reference implementation @[REF]. */ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_check_sk(const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY) +); + +/************************************************* + * Name: mlk_kem_keypair_derand + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES + * bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES + * bytes) + * - uint8_t *coins: pointer to input randomness + * (an already allocated array filled with 2*MLKEM_SYMBYTES + * random bytes) + * + * Returns: - 0: On success + * - MLK_ERR_FAIL: If MLK_CONFIG_KEYGEN_PCT is enabled and the + * PCT failed. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Algorithm 16, ML-KEM.KeyGen_Internal] + * + **************************************************/ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_keypair_derand(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + const uint8_t coins[2 * MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES)) + requires(memory_no_alias(coins, 2 * MLKEM_SYMBYTES)) + assigns(memory_slice(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + assigns(memory_slice(sk, MLKEM_INDCCA_SECRETKEYBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY || + return_value == MLK_ERR_RNG_FAIL) +); + +/************************************************* + * Name: mlk_kem_keypair + * + * Description: Generates public and private key + * for CCA-secure ML-KEM key encapsulation mechanism + * + * Arguments: - uint8_t *pk: pointer to output public key + * (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES + * bytes) + * - uint8_t *sk: pointer to output private key + * (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES + * bytes) + * + * Returns: - 0: On success + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * - MLK_ERR_RNG_FAIL: Random number generation failed. + * - MLK_ERR_FAIL: If MLK_CONFIG_KEYGEN_PCT is enabled and the + * PCT failed. + * + * Specification: Implements @[FIPS203, Algorithm 19, ML-KEM.KeyGen] + * + **************************************************/ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_keypair(uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES)) + assigns(memory_slice(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + assigns(memory_slice(sk, MLKEM_INDCCA_SECRETKEYBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY || + return_value == MLK_ERR_RNG_FAIL) +); + +/************************************************* + * Name: mlk_kem_enc_derand + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES + * bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES + * bytes) + * - const uint8_t *coins: pointer to input randomness + * (an already allocated array filled with MLKEM_SYMBYTES random + * bytes) + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the 'modulus check' @[FIPS203, Section 7.2] + * for the public key fails. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Algorithm 17, ML-KEM.Encaps_Internal] + * + **************************************************/ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_enc_derand(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES], + uint8_t ss[MLKEM_SSBYTES], + const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + const uint8_t coins[MLKEM_SYMBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + requires(memory_no_alias(coins, MLKEM_SYMBYTES)) + assigns(memory_slice(ct, MLKEM_INDCCA_CIPHERTEXTBYTES)) + assigns(memory_slice(ss, MLKEM_SSBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY) +); + +/************************************************* + * Name: mlk_kem_enc + * + * Description: Generates cipher text and shared + * secret for given public key + * + * Arguments: - uint8_t *ct: pointer to output cipher text + * (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES + * bytes) + * - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *pk: pointer to input public key + * (an already allocated array of MLKEM_INDCCA_PUBLICKEYBYTES + * bytes) + * + * Returns: - 0 on success + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * - MLK_ERR_RNG_FAIL: Random number generation failed. + * - MLK_ERR_FAIL: If the 'modulus check' @[FIPS203, Section 7.2] + * for the public key fails. + * + * Specification: Implements @[FIPS203, Algorithm 20, ML-KEM.Encaps] + * + **************************************************/ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_enc(uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES], + uint8_t ss[MLKEM_SSBYTES], + const uint8_t pk[MLKEM_INDCCA_PUBLICKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES)) + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(pk, MLKEM_INDCCA_PUBLICKEYBYTES)) + assigns(memory_slice(ct, MLKEM_INDCCA_CIPHERTEXTBYTES)) + assigns(memory_slice(ss, MLKEM_SSBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY || + return_value == MLK_ERR_RNG_FAIL) +); + +/************************************************* + * Name: mlk_kem_dec + * + * Description: Generates shared secret for given + * cipher text and private key + * + * Arguments: - uint8_t *ss: pointer to output shared secret + * (an already allocated array of MLKEM_SSBYTES bytes) + * - const uint8_t *ct: pointer to input cipher text + * (an already allocated array of MLKEM_INDCCA_CIPHERTEXTBYTES + * bytes) + * - const uint8_t *sk: pointer to input private key + * (an already allocated array of MLKEM_INDCCA_SECRETKEYBYTES + * bytes) + * + * Returns: - 0 on success + * - MLK_ERR_FAIL: If the 'hash check' @[FIPS203, Section 7.3] + * for the secret key fails. + * - MLK_ERR_OUT_OF_MEMORY: If MLK_CONFIG_CUSTOM_ALLOC_FREE is + * used and an allocation via MLK_CUSTOM_ALLOC returned NULL. + * + * Specification: Implements @[FIPS203, Algorithm 21, ML-KEM.Decaps] + * + **************************************************/ +MLK_EXTERNAL_API +MLK_MUST_CHECK_RETURN_VALUE +int mlk_kem_dec(uint8_t ss[MLKEM_SSBYTES], + const uint8_t ct[MLKEM_INDCCA_CIPHERTEXTBYTES], + const uint8_t sk[MLKEM_INDCCA_SECRETKEYBYTES], + MLK_CONFIG_CONTEXT_PARAMETER_TYPE context) +__contract__( + requires(memory_no_alias(ss, MLKEM_SSBYTES)) + requires(memory_no_alias(ct, MLKEM_INDCCA_CIPHERTEXTBYTES)) + requires(memory_no_alias(sk, MLKEM_INDCCA_SECRETKEYBYTES)) + assigns(memory_slice(ss, MLKEM_SSBYTES)) + ensures(return_value == 0 || return_value == MLK_ERR_FAIL || + return_value == MLK_ERR_OUT_OF_MEMORY) +); + +#endif /* !MLK_KEM_H */ diff --git a/mlkem_native/src/params.h b/mlkem_native/src/params.h new file mode 100644 index 0000000..0459853 --- /dev/null +++ b/mlkem_native/src/params.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_PARAMS_H +#define MLK_PARAMS_H + +#if !defined(MLK_CONFIG_PARAMETER_SET) +#error MLK_CONFIG_PARAMETER_SET is not defined +#endif + +#if MLK_CONFIG_PARAMETER_SET == 512 +#define MLKEM_K 2 +#elif MLK_CONFIG_PARAMETER_SET == 768 +#define MLKEM_K 3 +#elif MLK_CONFIG_PARAMETER_SET == 1024 +#define MLKEM_K 4 +#else +#error Invalid value for MLK_CONFIG_PARAMETER_SET. Must be 512, 768, or 1024. +#endif + +#define MLKEM_N 256 +#define MLKEM_Q 3329 +#define MLKEM_Q_HALF ((MLKEM_Q + 1) / 2) /* 1665 */ +#define MLKEM_UINT12_LIMIT 4096 + +#define MLKEM_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define MLKEM_SSBYTES 32 /* size in bytes of shared key */ + +#define MLKEM_POLYBYTES 384 +#define MLKEM_POLYVECBYTES (MLKEM_K * MLKEM_POLYBYTES) + +#define MLKEM_POLYCOMPRESSEDBYTES_D4 128 +#define MLKEM_POLYCOMPRESSEDBYTES_D5 160 +#define MLKEM_POLYCOMPRESSEDBYTES_D10 320 +#define MLKEM_POLYCOMPRESSEDBYTES_D11 352 + +#if MLKEM_K == 2 +#define MLKEM_ETA1 3 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 3 +#define MLKEM_ETA1 2 +#define MLKEM_DU 10 +#define MLKEM_DV 4 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D4 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D10 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#elif MLKEM_K == 4 +#define MLKEM_ETA1 2 +#define MLKEM_DU 11 +#define MLKEM_DV 5 +#define MLKEM_POLYCOMPRESSEDBYTES_DV MLKEM_POLYCOMPRESSEDBYTES_D5 +#define MLKEM_POLYCOMPRESSEDBYTES_DU MLKEM_POLYCOMPRESSEDBYTES_D11 +#define MLKEM_POLYVECCOMPRESSEDBYTES_DU (MLKEM_K * MLKEM_POLYCOMPRESSEDBYTES_DU) +#endif /* MLKEM_K == 4 */ + +#define MLKEM_ETA2 2 + +#define MLKEM_INDCPA_MSGBYTES (MLKEM_SYMBYTES) +#define MLKEM_INDCPA_PUBLICKEYBYTES (MLKEM_POLYVECBYTES + MLKEM_SYMBYTES) +#define MLKEM_INDCPA_SECRETKEYBYTES (MLKEM_POLYVECBYTES) +#define MLKEM_INDCPA_BYTES \ + (MLKEM_POLYVECCOMPRESSEDBYTES_DU + MLKEM_POLYCOMPRESSEDBYTES_DV) + +#define MLKEM_INDCCA_PUBLICKEYBYTES (MLKEM_INDCPA_PUBLICKEYBYTES) +/* 32 bytes of additional space to save H(pk) */ +#define MLKEM_INDCCA_SECRETKEYBYTES \ + (MLKEM_INDCPA_SECRETKEYBYTES + MLKEM_INDCPA_PUBLICKEYBYTES + \ + 2 * MLKEM_SYMBYTES) +#define MLKEM_INDCCA_CIPHERTEXTBYTES (MLKEM_INDCPA_BYTES) + +#endif /* !MLK_PARAMS_H */ diff --git a/mlkem_native/src/poly.c b/mlkem_native/src/poly.c new file mode 100644 index 0000000..564d5d7 --- /dev/null +++ b/mlkem_native/src/poly.c @@ -0,0 +1,572 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [NeonNTT] + * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 + * Becker, Hwang, Kannwischer, Yang, Yang + * https://eprint.iacr.org/2021/986 + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#include "common.h" +#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) + + +#include "cbmc.h" +#include "debug.h" +#include "poly.h" +#include "sampling.h" +#include "symmetric.h" +#include "verify.h" + +/************************************************* + * Name: mlk_fqmul + * + * Description: Montgomery multiplication modulo MLKEM_Q + * + * Arguments: - int16_t a: first factor + * Can be any int16_t. + * - int16_t b: second factor. + * Must be signed canonical (abs value <(MLKEM_Q+1)/2) + * + * Returns 16-bit integer congruent to a*b*R^{-1} mod MLKEM_Q, and + * smaller than MLKEM_Q in absolute value. + * + **************************************************/ + +/* Reference: `fqmul()` in the reference implementation @[REF]. */ +static MLK_INLINE int16_t mlk_fqmul(int16_t a, int16_t b) +__contract__( + requires(b > -MLKEM_Q_HALF && b < MLKEM_Q_HALF) + ensures(return_value > -MLKEM_Q && return_value < MLKEM_Q) +) +{ + int16_t res; + mlk_assert_abs_bound(&b, 1, MLKEM_Q_HALF); + + res = mlk_montgomery_reduce((int32_t)a * (int32_t)b); + /* Bounds: + * |res| <= ceil(|a| * |b| / 2^16) + (MLKEM_Q + 1) / 2 + * <= ceil(2^15 * ((MLKEM_Q - 1)/2) / 2^16) + (MLKEM_Q + 1) / 2 + * <= ceil((MLKEM_Q - 1) / 4) + (MLKEM_Q + 1) / 2 + * < MLKEM_Q + */ + + mlk_assert_abs_bound(&res, 1, MLKEM_Q); + return res; +} + +/************************************************* + * Name: mlk_barrett_reduce + * + * Description: Barrett reduction; given a 16-bit integer a, computes + * centered representative congruent to a mod q in + * {-(q-1)/2,...,(q-1)/2} + * + * Arguments: - int16_t a: input integer to be reduced + * + * Returns: integer in {-(q-1)/2,...,(q-1)/2} congruent to a modulo q. + * + **************************************************/ + +/* Reference: `barrett_reduce()` in the reference implementation @[REF]. */ +static MLK_INLINE int16_t mlk_barrett_reduce(int16_t a) +__contract__( + ensures(return_value > -MLKEM_Q_HALF && return_value < MLKEM_Q_HALF) +) +{ + /* Barrett reduction approximates + * ``` + * round(a/MLKEM_Q) + * = round(a*(2^N/MLKEM_Q))/2^N) + * ~= round(a*round(2^N/MLKEM_Q)/2^N) + * ``` + * Here, we pick N=26. + */ + const int32_t magic = 20159; /* check-magic: 20159 == round(2^26 / MLKEM_Q) */ + + /* + * PORTABILITY: Right-shift on a signed integer is + * implementation-defined for negative left argument. + * Here, we assume it's sign-preserving "arithmetic" shift right. + * See (C99 6.5.7 (5)) + */ + const int32_t t = (magic * a + ((int32_t)1 << 25)) >> 26; + + /* + * t is in -10 .. +10, so we need 32-bit math to + * evaluate t * MLKEM_Q and the subsequent subtraction + */ + int16_t res = (int16_t)(a - t * MLKEM_Q); + + mlk_assert_abs_bound(&res, 1, MLKEM_Q_HALF); + return res; +} + +/* Reference: `poly_tomont()` in the reference implementation @[REF]. */ +MLK_STATIC_TESTABLE void mlk_poly_tomont_c(mlk_poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q)) +) +{ + unsigned i; + const int16_t f = 1353; /* check-magic: 1353 == signed_mod(2^32, MLKEM_Q) */ + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i <= MLKEM_N) + invariant(array_abs_bound(r->coeffs, 0, i, MLKEM_Q))) + { + r->coeffs[i] = mlk_fqmul(r->coeffs[i], f); + } + + mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q); +} + +MLK_INTERNAL_API +void mlk_poly_tomont(mlk_poly *r) +{ +#if defined(MLK_USE_NATIVE_POLY_TOMONT) + int ret; + ret = mlk_poly_tomont_native(r->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_abs_bound(r, MLKEM_N, MLKEM_Q); + return; + } +#endif /* MLK_USE_NATIVE_POLY_TOMONT */ + + mlk_poly_tomont_c(r); +} + +/************************************************************ + * Name: mlk_scalar_signed_to_unsigned_q + * + * Description: Constant-time conversion of signed representatives + * modulo MLKEM_Q within range (-(MLKEM_Q-1) .. (MLKEM_Q-1)) + * into unsigned representatives within range (0..(MLKEM_Q-1)). + * + * Arguments: c: signed coefficient to be converted + * + ************************************************************/ + +/* Reference: Not present in the reference implementation @[REF]. + * - Used here to implement different semantics of `poly_reduce()`; + * see below. in the reference implementation @[REF], this logic is + * part of all compression functions (see `compress.c`). */ +static MLK_INLINE int16_t mlk_scalar_signed_to_unsigned_q(int16_t c) +__contract__( + requires(c > -MLKEM_Q && c < MLKEM_Q) + ensures(return_value >= 0 && return_value < MLKEM_Q) + ensures(return_value == (int32_t)c + (((int32_t)c < 0) * MLKEM_Q))) +{ + mlk_assert_abs_bound(&c, 1, MLKEM_Q); + + /* Add MLKEM_Q if c is negative, but in constant time. + * + * Note that c + MLKEM_Q does not overflow in int16_t, + * so the cast to uint16_t is safe. */ + c = mlk_ct_sel_int16((int16_t)(c + MLKEM_Q), c, mlk_ct_cmask_neg_i16(c)); + + mlk_assert_bound(&c, 1, 0, MLKEM_Q); + return c; +} + +/* Reference: `poly_reduce()` in the reference implementation @[REF] + * - We use _unsigned_ canonical outputs, while the reference + * implementation uses _signed_ canonical outputs. + * Accordingly, we need a conditional addition of MLKEM_Q + * here to go from signed to unsigned representatives. + * This conditional addition is then dropped from all + * polynomial compression functions instead (see `compress.c`). */ +MLK_STATIC_TESTABLE void mlk_poly_reduce_c(mlk_poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +) +{ + unsigned i; + + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i <= MLKEM_N) + invariant(array_bound(r->coeffs, 0, i, 0, MLKEM_Q))) + { + /* Barrett reduction, giving signed canonical representative */ + int16_t t = mlk_barrett_reduce(r->coeffs[i]); + /* Conditional addition to get unsigned canonical representative */ + r->coeffs[i] = mlk_scalar_signed_to_unsigned_q(t); + } + + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); +} + +MLK_INTERNAL_API +void mlk_poly_reduce(mlk_poly *r) +{ +#if defined(MLK_USE_NATIVE_POLY_REDUCE) + int ret; + ret = mlk_poly_reduce_native(r->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_bound(r, MLKEM_N, 0, MLKEM_Q); + return; + } +#endif /* MLK_USE_NATIVE_POLY_REDUCE */ + + mlk_poly_reduce_c(r); +} + +/* Reference: `poly_add()` in the reference implementation @[REF]. + * - We use destructive version (output=first input) to avoid + * reasoning about aliasing in the CBMC specification */ +MLK_INTERNAL_API +void mlk_poly_add(mlk_poly *r, const mlk_poly *b) +{ + unsigned i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i <= MLKEM_N) + invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] + b->coeffs[k1]))) + { + /* The preconditions imply that the addition stays within int16_t. */ + r->coeffs[i] = (int16_t)(r->coeffs[i] + b->coeffs[i]); + } +} + +/* Reference: `poly_sub()` in the reference implementation @[REF]. + * - We use destructive version (output=first input) to avoid + * reasoning about aliasing in the CBMC specification */ +MLK_INTERNAL_API +void mlk_poly_sub(mlk_poly *r, const mlk_poly *b) +{ + unsigned i; + for (i = 0; i < MLKEM_N; i++) + __loop__( + invariant(i <= MLKEM_N) + invariant(forall(k0, i, MLKEM_N, r->coeffs[k0] == loop_entry(*r).coeffs[k0])) + invariant(forall(k1, 0, i, r->coeffs[k1] == loop_entry(*r).coeffs[k1] - b->coeffs[k1]))) + { + /* The preconditions imply that the subtraction stays within int16_t. */ + r->coeffs[i] = (int16_t)(r->coeffs[i] - b->coeffs[i]); + } +} + +#include "zetas.inc" + +/* Reference: Does not exist in the reference implementation @[REF]. + * - The reference implementation does not use a + * multiplication cache ('mulcache'). This idea originates + * from @[NeonNTT] and is used at the C level here. */ +MLK_STATIC_TESTABLE void mlk_poly_mulcache_compute_c(mlk_poly_mulcache *x, + const mlk_poly *a) +__contract__( + requires(memory_no_alias(x, sizeof(mlk_poly_mulcache))) + requires(memory_no_alias(a, sizeof(mlk_poly))) + assigns(memory_slice(x, sizeof(mlk_poly_mulcache))) +) +{ + unsigned i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(x->coeffs, 0, 2 * i, MLKEM_Q))) + { + x->coeffs[2 * i + 0] = mlk_fqmul(a->coeffs[4 * i + 1], mlk_zetas[64 + i]); + /* The values in zeta table are <= MLKEM_Q in absolute value, + * so the negation in int16_t is safe. */ + x->coeffs[2 * i + 1] = + mlk_fqmul(a->coeffs[4 * i + 3], (int16_t)(-mlk_zetas[64 + i])); + } + + /* + * This bound is true for the C implementation, but not needed + * in the higher level bounds reasoning. It is thus omitted + * from the spec to not unnecessarily constrain native + * implementations, but checked here nonetheless. + */ + mlk_assert_abs_bound(x, MLKEM_N / 2, MLKEM_Q); +} + +MLK_INTERNAL_API +void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a) +{ +#if defined(MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE) + int ret; + ret = mlk_poly_mulcache_compute_native(x->coeffs, a->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } +#endif /* MLK_USE_NATIVE_POLY_MULCACHE_COMPUTE */ + + mlk_poly_mulcache_compute_c(x, a); +} + +/* + * Computes a block CT butterflies with a fixed twiddle factor, + * using Montgomery multiplication. + * Parameters: + * - r: Pointer to base of polynomial (_not_ the base of butterfly block) + * - root: Twiddle factor to use for the butterfly. This must be in + * Montgomery form and signed canonical. + * - start: Offset to the beginning of the butterfly block + * - len: Index difference between coefficients subject to a butterfly + * - bound: Ghost variable describing coefficient bound: Prior to `start`, + * coefficients must be bound by `bound + MLKEM_Q`. Post `start`, + * they must be bound by `bound`. + * When this function returns, output coefficients in the index range + * [start, start+2*len) have bound bumped to `bound + MLKEM_Q`. + * Example: + * - start=8, len=4 + * This would compute the following four butterflies + * 8 -- 12 + * 9 -- 13 + * 10 -- 14 + * 11 -- 15 + * - start=4, len=2 + * This would compute the following two butterflies + * 4 -- 6 + * 5 -- 7 + */ + +/* Reference: Embedded in `ntt()` in the reference implementation @[REF]. */ +static void mlk_ntt_butterfly_block(int16_t r[MLKEM_N], int16_t zeta, + unsigned start, unsigned len, + unsigned bound) +__contract__( + requires(start < MLKEM_N) + requires(1 <= len && len <= MLKEM_N / 2 && start + 2 * len <= MLKEM_N) + requires(0 <= bound && bound < INT16_MAX - MLKEM_Q) + requires(-MLKEM_Q_HALF < zeta && zeta < MLKEM_Q_HALF) + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(array_abs_bound(r, 0, start, bound + MLKEM_Q)) + requires(array_abs_bound(r, start, MLKEM_N, bound)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, start + 2*len, bound + MLKEM_Q)) + ensures(array_abs_bound(r, start + 2 * len, MLKEM_N, bound))) +{ + /* `bound` is a ghost variable only needed in the CBMC specification */ + unsigned j; + ((void)bound); + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + /* + * Coefficients are updated in strided pairs, so the bounds for the + * intermediate states alternate twice between the old and new bound + */ + invariant(array_abs_bound(r, 0, j, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j, start + len, bound)) + invariant(array_abs_bound(r, start + len, j + len, bound + MLKEM_Q)) + invariant(array_abs_bound(r, j + len, MLKEM_N, bound))) + { + int16_t t; + t = mlk_fqmul(r[j + len], zeta); + /* The precondition implies that the arithmetic does not overflow. */ + r[j + len] = (int16_t)(r[j] - t); + r[j] = (int16_t)(r[j] + t); + } +} + +/* + * Compute one layer of forward NTT + * Parameters: + * - r: Pointer to base of polynomial + * - layer: Variable indicating which layer is being applied. + */ + +/* Reference: Embedded in `ntt()` in the reference implementation @[REF]. */ +static void mlk_ntt_layer(int16_t r[MLKEM_N], unsigned layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(1 <= layer && layer <= 7) + requires(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N, (layer + 1) * MLKEM_Q))) +{ + unsigned start, k, len; + /* Twiddle factors for layer n are at indices 2^(n-1)..2^n-1. */ + k = 1u << (layer - 1); + len = (unsigned)MLKEM_N >> layer; + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(start < MLKEM_N + 2 * len) + invariant(k <= MLKEM_N / 2 && 2 * len * k == start + MLKEM_N) + invariant(array_abs_bound(r, 0, start, layer * MLKEM_Q + MLKEM_Q)) + invariant(array_abs_bound(r, start, MLKEM_N, layer * MLKEM_Q))) + { + int16_t zeta = mlk_zetas[k++]; + mlk_ntt_butterfly_block(r, zeta, start, len, layer * MLKEM_Q); + } +} + +/* + * Compute full forward NTT + * NOTE: This particular implementation satisfies a much tighter + * bound on the output coefficients (5*q) than the contractual one (8*q), + * but this is not needed in the calling code. Should we change the + * base multiplication strategy to require smaller NTT output bounds, + * the proof may need strengthening. + */ + +/* Reference: `ntt()` in the reference implementation @[REF]. + * - Iterate over `layer` instead of `len` in the outer loop + * to simplify computation of zeta index. */ +MLK_STATIC_TESTABLE void mlk_poly_ntt_c(mlk_poly *p) +__contract__( + requires(memory_no_alias(p, sizeof(mlk_poly))) + requires(array_abs_bound(p->coeffs, 0, MLKEM_N, MLKEM_Q)) + assigns(memory_slice(p, sizeof(mlk_poly))) + ensures(array_abs_bound(p->coeffs, 0, MLKEM_N, MLK_NTT_BOUND)) +) +{ + unsigned layer; + int16_t *r; + + mlk_assert_abs_bound(p, MLKEM_N, MLKEM_Q); + + r = p->coeffs; + + for (layer = 1; layer <= 7; layer++) + __loop__( + invariant(1 <= layer && layer <= 8) + invariant(array_abs_bound(r, 0, MLKEM_N, layer * MLKEM_Q))) + { + mlk_ntt_layer(r, layer); + } + + /* Check the stronger bound */ + mlk_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND); +} + +MLK_INTERNAL_API +void mlk_poly_ntt(mlk_poly *p) +{ +#if defined(MLK_USE_NATIVE_NTT) + int ret; + mlk_assert_abs_bound(p, MLKEM_N, MLKEM_Q); + ret = mlk_ntt_native(p->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_abs_bound(p, MLKEM_N, MLK_NTT_BOUND); + return; + } +#endif /* MLK_USE_NATIVE_NTT */ + + mlk_poly_ntt_c(p); +} + + +/* Compute one layer of inverse NTT */ + +/* Reference: Embedded into `invntt()` in the reference implementation @[REF] */ +static void mlk_invntt_layer(int16_t *r, unsigned layer) +__contract__( + requires(memory_no_alias(r, sizeof(int16_t) * MLKEM_N)) + requires(1 <= layer && layer <= 7) + requires(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * MLKEM_N)) + ensures(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) +{ + unsigned start, k, len; + len = (unsigned)MLKEM_N >> layer; + k = (1u << layer) - 1; + for (start = 0; start < MLKEM_N; start += 2 * len) + __loop__( + invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q)) + invariant(start <= MLKEM_N && k <= 127) + /* Normalised form of k == MLKEM_N / len - 1 - start / (2 * len) */ + invariant(2 * len * k + start == 2 * MLKEM_N - 2 * len)) + { + unsigned j; + int16_t zeta = mlk_zetas[k--]; + for (j = start; j < start + len; j++) + __loop__( + invariant(start <= j && j <= start + len) + invariant(start <= MLKEM_N && k <= 127) + invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) + { + int16_t t = r[j]; + /* The preconditions imply that the arithmetic does not overflow. */ + r[j] = mlk_barrett_reduce((int16_t)(t + r[j + len])); + r[j + len] = (int16_t)(r[j + len] - t); + r[j + len] = mlk_fqmul(r[j + len], zeta); + } + } +} + +/* Reference: `invntt()` in the reference implementation @[REF] + * - We normalize at the beginning of the inverse NTT, + * while the reference implementation normalizes at + * the end. This allows us to drop a call to `poly_reduce()` + * from the base multiplication. */ +MLK_STATIC_TESTABLE void mlk_poly_invntt_tomont_c(mlk_poly *p) +__contract__( + requires(memory_no_alias(p, sizeof(mlk_poly))) + assigns(memory_slice(p, sizeof(mlk_poly))) + ensures(array_abs_bound(p->coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND)) +) +{ + unsigned j, layer; + const int16_t f = 1441; /* check-magic: 1441 == pow(2,32 - 7,MLKEM_Q) */ + int16_t *r = p->coeffs; + + /* + * Scale input polynomial to account for Montgomery factor + * and NTT twist. This also brings coefficients down to + * absolute value < MLKEM_Q. + */ + for (j = 0; j < MLKEM_N; j++) + __loop__( + invariant(j <= MLKEM_N) + invariant(array_abs_bound(r, 0, j, MLKEM_Q))) + { + r[j] = mlk_fqmul(r[j], f); + } + + /* Run the invNTT layers */ + for (layer = 7; layer > 0; layer--) + __loop__( + invariant(0 <= layer && layer < 8) + invariant(array_abs_bound(r, 0, MLKEM_N, MLKEM_Q))) + { + mlk_invntt_layer(r, layer); + } + + mlk_assert_abs_bound(p, MLKEM_N, MLK_INVNTT_BOUND); +} + +MLK_INTERNAL_API +void mlk_poly_invntt_tomont(mlk_poly *p) +{ +#if defined(MLK_USE_NATIVE_INTT) + int ret; + ret = mlk_intt_native(p->coeffs); + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + mlk_assert_abs_bound(p, MLKEM_N, MLK_INVNTT_BOUND); + return; + } +#endif /* MLK_USE_NATIVE_INTT */ + + mlk_poly_invntt_tomont_c(p); +} + +#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +MLK_EMPTY_CU(mlk_poly) + +#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */ diff --git a/mlkem_native/src/poly.h b/mlkem_native/src/poly.h new file mode 100644 index 0000000..587062c --- /dev/null +++ b/mlkem_native/src/poly.h @@ -0,0 +1,317 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + */ + +#ifndef MLK_POLY_H +#define MLK_POLY_H + + +#include "cbmc.h" +#include "common.h" +#include "debug.h" +#include "verify.h" + +/* Absolute exclusive upper bound for the output of the inverse NTT */ +#define MLK_INVNTT_BOUND (8 * MLKEM_Q) + +/* Absolute exclusive upper bound for the output of the forward NTT */ +#define MLK_NTT_BOUND (8 * MLKEM_Q) + +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*coeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct +{ + int16_t coeffs[MLKEM_N]; +} MLK_ALIGN mlk_poly; + +/* + * INTERNAL presentation of precomputed data speeding up + * the base multiplication of two polynomials in NTT domain. + */ +typedef struct +{ + int16_t coeffs[MLKEM_N >> 1]; +} MLK_ALIGN mlk_poly_mulcache; + +/************************************************* + * Name: mlk_montgomery_reduce + * + * Description: Generic Montgomery reduction; given a 32-bit integer a, computes + * 16-bit integer congruent to a * R^-1 mod q, where R=2^16 + * + * Arguments: - int32_t a: input integer to be reduced, of absolute value + * smaller or equal to INT32_MAX - 2^15 * MLKEM_Q. + * + * Returns: integer congruent to a * R^-1 modulo q, with absolute value + * <= ceil(|a| / 2^16) + (MLKEM_Q + 1)/2 + * + **************************************************/ +static MLK_ALWAYS_INLINE int16_t mlk_montgomery_reduce(int32_t a) +__contract__( + requires(a < +(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)) && + a > -(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q))) + /* We don't attempt to express an input-dependent output bound + * as the post-condition here. There are two call-sites for this + * function: + * - The base multiplication: Here, we need no output bound. + * - mlk_fqmul: Here, we inline this function and prove another spec + * for mlk_fqmul which does have a post-condition bound. */ +) +{ + /* check-magic: 62209 == unsigned_mod(pow(MLKEM_Q, -1, 2^16), 2^16) */ + const uint32_t QINV = 62209; + + /* Compute a*q^{-1} mod 2^16 in unsigned representatives. */ + const uint16_t a_reduced = mlk_cast_int32_to_uint16(a); + const uint16_t a_inverted = (a_reduced * QINV) & UINT16_MAX; + + /* Lift to signed canonical representative mod 2^16. */ + const int16_t t = mlk_cast_uint16_to_int16(a_inverted); + + int32_t r; + + mlk_assert(a < +(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q)) && + a > -(INT32_MAX - (((int32_t)1 << 15) * MLKEM_Q))); + + r = a - ((int32_t)t * MLKEM_Q); + + /* + * PORTABILITY: Right-shift on a signed integer is, strictly-speaking, + * implementation-defined for negative left argument. Here, + * we assume it's sign-preserving "arithmetic" shift right. (C99 6.5.7 (5)) + */ + r = r >> 16; + /* Bounds: |r >> 16| <= ceil(|r| / 2^16) + * <= ceil(|a| / 2^16 + MLKEM_Q / 2) + * <= ceil(|a| / 2^16) + (MLKEM_Q + 1) / 2 + * + * (Note that |a >> n| = ceil(|a| / 2^16) for negative a) + */ + return (int16_t)r; +} + +#define mlk_poly_tomont MLK_NAMESPACE(poly_tomont) +/************************************************* + * Name: mlk_poly_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + * Arguments: - mlk_poly *r: pointer to input/output polynomial + * + * Specification: Internal normalization required in `mlk_indcpa_keypair_derand` + * as part of matrix-vector multiplication + * @[FIPS203, Algorithm 13, K-PKE.KeyGen, L18]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_tomont(mlk_poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q)) +); + +#define mlk_poly_mulcache_compute MLK_NAMESPACE(poly_mulcache_compute) +/************************************************************ + * Name: mlk_poly_mulcache_compute + * + * Description: Computes the mulcache for a polynomial in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial + * + * Specification: + * - Caches `b_1 * \gamma` in @[FIPS203, Algorithm 12, BaseCaseMultiply, L1] + * + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +MLK_INTERNAL_API +void mlk_poly_mulcache_compute(mlk_poly_mulcache *x, const mlk_poly *a) +__contract__( + requires(memory_no_alias(x, sizeof(mlk_poly_mulcache))) + requires(memory_no_alias(a, sizeof(mlk_poly))) + assigns(memory_slice(x, sizeof(mlk_poly_mulcache))) +); + +#define mlk_poly_reduce MLK_NAMESPACE(poly_reduce) +/************************************************* + * Name: mlk_poly_reduce + * + * Description: Converts polynomial to _unsigned canonical_ representatives. + * + * The input coefficients can be arbitrary integers in int16_t. + * The output coefficients are in [0,1,...,MLKEM_Q-1]. + * + * Arguments: - mlk_poly *r: pointer to input/output polynomial + * + * Specification: Normalizes on unsigned canoncial representatives + * ahead of calling @[FIPS203, Compress_d, Eq (4.7)]. + * This is not made explicit in FIPS 203. + * + **************************************************/ +/* + * NOTE: The semantics of mlk_poly_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of mlk_poly_reduce() in the context of (de)serialization. + */ +MLK_INTERNAL_API +void mlk_poly_reduce(mlk_poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) +); + +#define mlk_poly_add MLK_NAMESPACE(poly_add) +/************************************************************ + * Name: mlk_poly_add + * + * Description: Adds two polynomials in place + * + * Arguments: - r: Pointer to input-output polynomial to be added to. + * - b: Pointer to input polynomial that should be added + * to r. Must be disjoint from r. + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + * Specification: + * - @[FIPS203, 2.4.5, Arithmetic With Polynomials and NTT Representations] + * - Used in @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L21] + * + ************************************************************/ +/* + * NOTE: The reference implementation uses a 3-argument mlk_poly_add. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +MLK_INTERNAL_API +void mlk_poly_add(mlk_poly *r, const mlk_poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(b, sizeof(mlk_poly))) + requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] + b->coeffs[k0] <= INT16_MAX)) + requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] + b->coeffs[k1] >= INT16_MIN)) + ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] + b->coeffs[k])) + assigns(memory_slice(r, sizeof(mlk_poly))) +); + +#define mlk_poly_sub MLK_NAMESPACE(poly_sub) +/************************************************* + * Name: mlk_poly_sub + * + * Description: Subtract two polynomials; no modular reduction is performed + * + * Arguments: - mlk_poly *r: Pointer to input-output polynomial to be added to. + * - const mlk_poly *b: Pointer to second input polynomial + * + * Specification: + * - @[FIPS203, 2.4.5, Arithmetic With Polynomials and NTT Representations] + * - Used in @[FIPS203, Algorithm 15, K-PKE.Decrypt, L6] + * + **************************************************/ +/* + * NOTE: The reference implementation uses a 3-argument mlk_poly_sub. + * We specialize to the accumulator form to avoid reasoning about aliasing. + */ +MLK_INTERNAL_API +void mlk_poly_sub(mlk_poly *r, const mlk_poly *b) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(b, sizeof(mlk_poly))) + requires(forall(k0, 0, MLKEM_N, (int32_t) r->coeffs[k0] - b->coeffs[k0] <= INT16_MAX)) + requires(forall(k1, 0, MLKEM_N, (int32_t) r->coeffs[k1] - b->coeffs[k1] >= INT16_MIN)) + ensures(forall(k, 0, MLKEM_N, r->coeffs[k] == old(*r).coeffs[k] - b->coeffs[k])) + assigns(memory_slice(r, sizeof(mlk_poly))) +); + +#define mlk_poly_ntt MLK_NAMESPACE(poly_ntt) +/************************************************* + * Name: mlk_poly_ntt + * + * Description: Computes negacyclic number-theoretic transform (NTT) of + * a polynomial in place. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by MLK_NTT_BOUND in absolute value. + * + * (NOTE: Sometimes the input to the NTT is actually smaller, + * which gives better bounds.) + * + * Arguments: - mlk_poly *p: pointer to in/output polynomial + * + * Specification: Implements @[FIPS203, Algorithm 9, NTT] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_ntt(mlk_poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_Q)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLK_NTT_BOUND)) +); + +#define mlk_poly_invntt_tomont MLK_NAMESPACE(poly_invntt_tomont) +/************************************************* + * Name: mlk_poly_invntt_tomont + * + * Description: Computes inverse of negacyclic number-theoretic transform (NTT) + * of a polynomial in place; + * inputs assumed to be in bitreversed order, output in normal + * order + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by MLK_INVNTT_BOUND in absolute value. + * + * Arguments: - uint16_t *a: pointer to in/output polynomial + * + * Specification: Implements composition of @[FIPS203, Algorithm 10, NTT^{-1}] + * and elementwise modular multiplication with a suitable + * Montgomery factor introduced during the base multiplication. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_invntt_tomont(mlk_poly *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND)) +); + +#endif /* !MLK_POLY_H */ diff --git a/mlkem_native/src/poly_k.c b/mlkem_native/src/poly_k.c new file mode 100644 index 0000000..32b214e --- /dev/null +++ b/mlkem_native/src/poly_k.c @@ -0,0 +1,502 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [NeonNTT] + * Neon NTT: Faster Dilithium, Kyber, and Saber on Cortex-A72 and Apple M1 + * Becker, Hwang, Kannwischer, Yang, Yang + * https://eprint.iacr.org/2021/986 + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#include "poly_k.h" + +#include "debug.h" +#include "sampling.h" +#include "symmetric.h" + +/* Parameter set namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying parameter sets) + * within a single compilation unit. */ +#define mlk_poly_cbd_eta1 MLK_ADD_PARAM_SET(mlk_poly_cbd_eta1) +#define mlk_poly_cbd_eta2 MLK_ADD_PARAM_SET(mlk_poly_cbd_eta2) +#define mlk_polyvec_basemul_acc_montgomery_cached_c \ + MLK_ADD_PARAM_SET(mlk_polyvec_basemul_acc_montgomery_cached_c) +/* End of parameter set namespacing */ + +/* Reference: `polyvec_compress()` in the reference implementation @[REF] + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ +MLK_INTERNAL_API +void mlk_polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const mlk_polyvec *a) +{ + unsigned i; + mlk_assert_bound_2d(a->vec, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_compress_du(r + i * MLKEM_POLYCOMPRESSEDBYTES_DU, &a->vec[i]); + } +} + +/* Reference: `polyvec_decompress()` in the reference implementation @[REF]. */ +MLK_INTERNAL_API +void mlk_polyvec_decompress_du(mlk_polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_decompress_du(&r->vec[i], a + i * MLKEM_POLYCOMPRESSEDBYTES_DU); + } + + mlk_assert_bound_2d(r->vec, MLKEM_K, MLKEM_N, 0, MLKEM_Q); +} + +/* Reference: `polyvec_tobytes()` in the reference implementation @[REF]. + * - In contrast to the reference implementation, we assume + * unsigned canonical coefficients here. + * The reference implementation works with coefficients + * in the range (-MLKEM_Q+1,...,MLKEM_Q-1). */ +MLK_INTERNAL_API +void mlk_polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const mlk_polyvec *a) +{ + unsigned i; + mlk_assert_bound_2d(a->vec, MLKEM_K, MLKEM_N, 0, MLKEM_Q); + + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, memory_slice(r, MLKEM_POLYVECBYTES)) + invariant(i <= MLKEM_K) + ) + { + mlk_poly_tobytes(&r[i * MLKEM_POLYBYTES], &a->vec[i]); + } +} + +/* Reference: `polyvec_frombytes()` in the reference implementation @[REF]. */ +MLK_INTERNAL_API +void mlk_polyvec_frombytes(mlk_polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES); + } + + mlk_assert_bound_2d(r->vec, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT); +} + +/* Reference: `polyvec_ntt()` in the reference implementation @[REF]. */ +MLK_INTERNAL_API +void mlk_polyvec_ntt(mlk_polyvec *r) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_ntt(&r->vec[i]); + } + + mlk_assert_abs_bound_2d(r->vec, MLKEM_K, MLKEM_N, MLK_NTT_BOUND); +} + +/* Reference: `polyvec_invntt_tomont()` in the reference implementation @[REF]. + * - We normalize at the beginning of the inverse NTT, + * while the reference implementation normalizes at + * the end. This allows us to drop a call to `poly_reduce()` + * from the base multiplication. */ +MLK_INTERNAL_API +void mlk_polyvec_invntt_tomont(mlk_polyvec *r) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_invntt_tomont(&r->vec[i]); + } + + mlk_assert_abs_bound_2d(r->vec, MLKEM_K, MLKEM_N, MLK_INVNTT_BOUND); +} + +/* Reference: `polyvec_basemul_acc_montgomery()` in the + * reference implementation @[REF]. + * - We use a multiplication cache ('mulcache') here + * which is not present in the reference implementation @[REF]. + * This idea originates from @[NeonNTT] and is used + * at the C level here. + * - We compute the coefficients of the scalar product in 32-bit + * coefficients and perform only a single modular reduction + * at the end. The reference implementation uses 2 * MLKEM_K + * more modular reductions since it reduces after every modular + * multiplication. */ +MLK_STATIC_TESTABLE void mlk_polyvec_basemul_acc_montgomery_cached_c( + mlk_poly *r, const mlk_polyvec *a, const mlk_polyvec *b, + const mlk_polyvec_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, sizeof(mlk_polyvec))) + requires(memory_no_alias(b, sizeof(mlk_polyvec))) + requires(memory_no_alias(b_cache, sizeof(mlk_polyvec_mulcache))) + requires(forall(k1, 0, MLKEM_K, + array_bound(a->vec[k1].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT))) + assigns(memory_slice(r, sizeof(mlk_poly))) +) +{ + unsigned i; + mlk_assert_bound_2d(a->vec, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT); + + for (i = 0; i < MLKEM_N / 2; i++) + __loop__(invariant(i <= MLKEM_N / 2)) + { + unsigned k; + int32_t t[2] = {0}; + for (k = 0; k < MLKEM_K; k++) + __loop__( + invariant(k <= MLKEM_K && + t[0] <= (int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768 && + t[0] >= - ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768) && + t[1] <= ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768) && + t[1] >= - ((int32_t) k * 2 * MLKEM_UINT12_LIMIT * 32768))) + { + t[0] += (int32_t)a->vec[k].coeffs[2 * i + 1] * b_cache->vec[k].coeffs[i]; + t[0] += (int32_t)a->vec[k].coeffs[2 * i] * b->vec[k].coeffs[2 * i]; + t[1] += (int32_t)a->vec[k].coeffs[2 * i] * b->vec[k].coeffs[2 * i + 1]; + t[1] += (int32_t)a->vec[k].coeffs[2 * i + 1] * b->vec[k].coeffs[2 * i]; + } + r->coeffs[2 * i + 0] = mlk_montgomery_reduce(t[0]); + r->coeffs[2 * i + 1] = mlk_montgomery_reduce(t[1]); + } +} + +MLK_INTERNAL_API +void mlk_polyvec_basemul_acc_montgomery_cached( + mlk_poly *r, const mlk_polyvec *a, const mlk_polyvec *b, + const mlk_polyvec_mulcache *b_cache) +{ +#if defined(MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED) + { + int ret; + mlk_assert_bound_2d(a->vec, MLKEM_K, MLKEM_N, 0, MLKEM_UINT12_LIMIT); +#if MLKEM_K == 2 + ret = mlk_polyvec_basemul_acc_montgomery_cached_k2_native( + r->coeffs, (const int16_t *)a, (const int16_t *)b, + (const int16_t *)b_cache); +#elif MLKEM_K == 3 + ret = mlk_polyvec_basemul_acc_montgomery_cached_k3_native( + r->coeffs, (const int16_t *)a, (const int16_t *)b, + (const int16_t *)b_cache); +#elif MLKEM_K == 4 + ret = mlk_polyvec_basemul_acc_montgomery_cached_k4_native( + r->coeffs, (const int16_t *)a, (const int16_t *)b, + (const int16_t *)b_cache); +#endif + if (ret == MLK_NATIVE_FUNC_SUCCESS) + { + return; + } + } +#endif /* MLK_USE_NATIVE_POLYVEC_BASEMUL_ACC_MONTGOMERY_CACHED */ + + mlk_polyvec_basemul_acc_montgomery_cached_c(r, a, b, b_cache); +} + +/* Reference: Does not exist in the reference implementation @[REF]. + * - The reference implementation does not use a + * multiplication cache ('mulcache'). This idea originates + * from @[NeonNTT] and is used at the C level here. */ +MLK_INTERNAL_API +void mlk_polyvec_mulcache_compute(mlk_polyvec_mulcache *x, const mlk_polyvec *a) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_mulcache_compute(&x->vec[i], &a->vec[i]); + } +} + +/* Reference: `polyvec_reduce()` in the reference implementation @[REF]. + * - We use _unsigned_ canonical outputs, while the reference + * implementation uses _signed_ canonical outputs. + * Accordingly, we need a conditional addition of MLKEM_Q + * here to go from signed to unsigned representatives. + * This conditional addition is then dropped from all + * polynomial compression functions instead (see `compress.c`). */ +MLK_INTERNAL_API +void mlk_polyvec_reduce(mlk_polyvec *r) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_reduce(&r->vec[i]); + } + + mlk_assert_bound_2d(r->vec, MLKEM_K, MLKEM_N, 0, MLKEM_Q); +} + +/* Reference: `polyvec_add()` in the reference implementation @[REF]. + * - We use destructive version (output=first input) to avoid + * reasoning about aliasing in the CBMC specification */ +MLK_INTERNAL_API +void mlk_polyvec_add(mlk_polyvec *r, const mlk_polyvec *b) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + __loop__( + assigns(i, memory_slice(r, sizeof(mlk_polyvec))) + invariant(i <= MLKEM_K) + invariant(forall(j0, i, MLKEM_K, + forall(k0, 0, MLKEM_N, + ((int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX) && + ((int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] >= INT16_MIN)))) + invariant(forall(j2, 0, i, + forall(k2, 0, MLKEM_N, + (r->vec[j2].coeffs[k2] <= INT16_MAX) && + (r->vec[j2].coeffs[k2] >= INT16_MIN)))) + ) + { + mlk_poly_add(&r->vec[i], &b->vec[i]); + } +} + +/* Reference: `polyvec_tomont()` in the reference implementation @[REF]. */ +MLK_INTERNAL_API +void mlk_polyvec_tomont(mlk_polyvec *r) +{ + unsigned i; + for (i = 0; i < MLKEM_K; i++) + { + mlk_poly_tomont(&r->vec[i]); + } + + mlk_assert_abs_bound_2d(r->vec, MLKEM_K, MLKEM_N, MLKEM_Q); +} + + +/************************************************* + * Name: mlk_poly_cbd_eta1 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA1. + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + * + * Specification: Implements @[FIPS203, Algorithm 8, SamplePolyCBD_eta1], where + * eta1 is specified per parameter set in @[FIPS203, Table 2] + * and represented as MLKEM_ETA1 here. + * + **************************************************/ + +/* Reference: `poly_cbd_eta1` in the reference implementation @[REF]. */ +static MLK_INLINE void mlk_poly_cbd_eta1( + mlk_poly *r, const uint8_t buf[MLKEM_ETA1 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(buf, MLKEM_ETA1 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA1 + 1)) +) +{ +#if MLKEM_ETA1 == 2 + mlk_poly_cbd2(r, buf); +#elif MLKEM_ETA1 == 3 + mlk_poly_cbd3(r, buf); +#else +#error "Invalid value of MLKEM_ETA1" +#endif +} + +/* Reference: Does not exist in the reference implementation @[REF]. + * - This implements a x4-batched version of `poly_getnoise_eta1()` + * from the reference implementation, to leverage + * batched Keccak-f1600.*/ +MLK_INTERNAL_API +void mlk_poly_getnoise_eta1_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2, + mlk_poly *r3, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +{ + MLK_ALIGN uint8_t buf[4][MLK_ALIGN_UP(MLKEM_ETA1 * MLKEM_N / 4)]; + MLK_ALIGN uint8_t extkey[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 1)]; + mlk_memcpy(extkey[0], seed, MLKEM_SYMBYTES); + mlk_memcpy(extkey[1], seed, MLKEM_SYMBYTES); + mlk_memcpy(extkey[2], seed, MLKEM_SYMBYTES); + mlk_memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + +#if !defined(FIPS202_X4_DEFAULT_IMPLEMENTATION) && \ + !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) + mlk_prf_eta1_x4(buf, extkey); +#else + mlk_prf_eta1(buf[0], extkey[0]); + mlk_prf_eta1(buf[1], extkey[1]); + mlk_prf_eta1(buf[2], extkey[2]); + if (r3 != NULL) + { + mlk_prf_eta1(buf[3], extkey[3]); + } +#endif /* !(!FIPS202_X4_DEFAULT_IMPLEMENTATION && \ + !MLK_CONFIG_SERIAL_FIPS202_ONLY) */ + + mlk_poly_cbd_eta1(r0, buf[0]); + mlk_poly_cbd_eta1(r1, buf[1]); + mlk_poly_cbd_eta1(r2, buf[2]); + if (r3 != NULL) + { + mlk_poly_cbd_eta1(r3, buf[3]); + mlk_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA1 + 1); + } + + mlk_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + mlk_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + mlk_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA1 + 1); + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(buf, sizeof(buf)); + mlk_zeroize(extkey, sizeof(extkey)); +} + +#if MLKEM_K == 2 || MLKEM_K == 4 +/************************************************* + * Name: mlk_poly_cbd_eta2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter MLKEM_ETA2. + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + * + * Specification: Implements @[FIPS203, Algorithm 8, SamplePolyCBD_eta2], where + * eta2 is specified per parameter set in @[FIPS203, Table 2] + * and represented as MLKEM_ETA2 here. + * + **************************************************/ + +/* Reference: `poly_cbd_eta2` in the reference implementation @[REF]. */ +static MLK_INLINE void mlk_poly_cbd_eta2( + mlk_poly *r, const uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(buf, MLKEM_ETA2 * MLKEM_N / 4)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1))) +{ +#if MLKEM_ETA2 == 2 + mlk_poly_cbd2(r, buf); +#else +#error "Invalid value of MLKEM_ETA2" +#endif +} + +/* Reference: `poly_getnoise_eta2()` in the reference implementation @[REF]. + * - We include buffer zeroization. */ +MLK_INTERNAL_API +void mlk_poly_getnoise_eta2(mlk_poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +{ + MLK_ALIGN uint8_t buf[MLKEM_ETA2 * MLKEM_N / 4]; + MLK_ALIGN uint8_t extkey[MLKEM_SYMBYTES + 1]; + + mlk_memcpy(extkey, seed, MLKEM_SYMBYTES); + extkey[MLKEM_SYMBYTES] = nonce; + mlk_prf_eta2(buf, extkey); + + mlk_poly_cbd_eta2(r, buf); + + mlk_assert_abs_bound(r, MLKEM_N, MLKEM_ETA2 + 1); + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(buf, sizeof(buf)); + mlk_zeroize(extkey, sizeof(extkey)); +} +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +/* Reference: Does not exist in the reference implementation @[REF]. + * - This implements a x4-batched version of `poly_getnoise_eta1()` + * and `poly_getnoise_eta2()` from the reference implementation, + * leveraging batched Keccak-f1600. + * - If a x4-batched Keccak-f1600 is available, we squeeze + * more random data than needed for the eta2 calls, to be + * be able to use a x4-batched Keccak-f1600. */ +MLK_INTERNAL_API +void mlk_poly_getnoise_eta1122_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2, + mlk_poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, + uint8_t nonce2, uint8_t nonce3) +{ +#if MLKEM_ETA2 >= MLKEM_ETA1 +#error mlk_poly_getnoise_eta1122_4x assumes MLKEM_ETA1 > MLKEM_ETA2 +#endif + MLK_ALIGN uint8_t buf[4][MLK_ALIGN_UP(MLKEM_ETA1 * MLKEM_N / 4)]; + MLK_ALIGN uint8_t extkey[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 1)]; + + mlk_memcpy(extkey[0], seed, MLKEM_SYMBYTES); + mlk_memcpy(extkey[1], seed, MLKEM_SYMBYTES); + mlk_memcpy(extkey[2], seed, MLKEM_SYMBYTES); + mlk_memcpy(extkey[3], seed, MLKEM_SYMBYTES); + extkey[0][MLKEM_SYMBYTES] = nonce0; + extkey[1][MLKEM_SYMBYTES] = nonce1; + extkey[2][MLKEM_SYMBYTES] = nonce2; + extkey[3][MLKEM_SYMBYTES] = nonce3; + + /* On systems with fast batched Keccak, we use 4-fold batched PRF, + * even though that means generating more random data in buf[2] and buf[3] + * than necessary. */ +#if !defined(FIPS202_X4_DEFAULT_IMPLEMENTATION) && \ + !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) + mlk_prf_eta1_x4(buf, extkey); +#else + mlk_prf_eta1(buf[0], extkey[0]); + mlk_prf_eta1(buf[1], extkey[1]); + mlk_prf_eta2(buf[2], extkey[2]); + mlk_prf_eta2(buf[3], extkey[3]); +#endif /* !(!FIPS202_X4_DEFAULT_IMPLEMENTATION && \ + !MLK_CONFIG_SERIAL_FIPS202_ONLY) */ + + mlk_poly_cbd_eta1(r0, buf[0]); + mlk_poly_cbd_eta1(r1, buf[1]); + mlk_poly_cbd_eta2(r2, buf[2]); + mlk_poly_cbd_eta2(r3, buf[3]); + + mlk_assert_abs_bound(r0, MLKEM_N, MLKEM_ETA1 + 1); + mlk_assert_abs_bound(r1, MLKEM_N, MLKEM_ETA1 + 1); + mlk_assert_abs_bound(r2, MLKEM_N, MLKEM_ETA2 + 1); + mlk_assert_abs_bound(r3, MLKEM_N, MLKEM_ETA2 + 1); + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(buf, sizeof(buf)); + mlk_zeroize(extkey, sizeof(extkey)); +} +#endif /* MLKEM_K == 2 */ + +/* To facilitate single-compilation-unit (SCU) builds, undefine all macros. + * Don't modify by hand -- this is auto-generated by scripts/autogen. */ +#undef mlk_poly_cbd_eta1 +#undef mlk_poly_cbd_eta2 +#undef mlk_polyvec_basemul_acc_montgomery_cached_c diff --git a/mlkem_native/src/poly_k.h b/mlkem_native/src/poly_k.h new file mode 100644 index 0000000..9089a8e --- /dev/null +++ b/mlkem_native/src/poly_k.h @@ -0,0 +1,668 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + */ + +#ifndef MLK_POLY_K_H +#define MLK_POLY_K_H + +#include "common.h" +#include "compress.h" +#include "poly.h" + +/* Parameter set namespacing + * This is to facilitate building multiple instances + * of mlkem-native (e.g. with varying parameter sets) + * within a single compilation unit. */ +#define mlk_polyvec MLK_ADD_PARAM_SET(mlk_polyvec) +#define mlk_polymat MLK_ADD_PARAM_SET(mlk_polymat) +#define mlk_polyvec_mulcache MLK_ADD_PARAM_SET(mlk_polyvec_mulcache) +/* End of parameter set namespacing */ + +typedef struct +{ + mlk_poly vec[MLKEM_K]; +} MLK_ALIGN mlk_polyvec; + +typedef struct +{ + mlk_polyvec vec[MLKEM_K]; +} MLK_ALIGN mlk_polymat; + +typedef struct +{ + mlk_poly_mulcache vec[MLKEM_K]; +} MLK_ALIGN mlk_polyvec_mulcache; + +#define mlk_poly_compress_du MLK_NAMESPACE_K(poly_compress_du) +/************************************************* + * Name: mlk_poly_compress_du + * + * Description: Compression (du bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * - const mlk_poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + * + * Specification: Implements `ByteEncode_{d_u} (Compress_{d_u} (u))` + * in @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L22], + * with level-specific d_u defined in @[FIPS203, Table 2], + * and given by MLKEM_DU here. + * + **************************************************/ +static MLK_INLINE void mlk_poly_compress_du( + uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DU], const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DU))) +{ +#if MLKEM_DU == 10 + mlk_poly_compress_d10(r, a); +#elif MLKEM_DU == 11 + mlk_poly_compress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define mlk_poly_decompress_du MLK_NAMESPACE_K(poly_decompress_du) +/************************************************* + * Name: mlk_poly_decompress_du + * + * Description: De-serialization and subsequent decompression (du bits) of a + * polynomial; approximate inverse of mlk_poly_compress_du + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DU bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + * Specification: Implements `Decompress_{d_u} (ByteDecode_{d_u} (u))` + * in @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L3]. + * with level-specific d_u defined in @[FIPS203, Table 2], + * and given by MLKEM_DU here. + * + **************************************************/ +static MLK_INLINE void mlk_poly_decompress_du( + mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DU == 10 + mlk_poly_decompress_d10(r, a); +#elif MLKEM_DU == 11 + mlk_poly_decompress_d11(r, a); +#else +#error "Invalid value of MLKEM_DU" +#endif +} + +#define mlk_poly_compress_dv MLK_NAMESPACE_K(poly_compress_dv) +/************************************************* + * Name: mlk_poly_compress_dv + * + * Description: Compression (dv bits) and subsequent serialization of a + * polynomial + * + * Arguments: - uint8_t *r: pointer to output byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * - const mlk_poly *a: pointer to input polynomial + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + * + * Specification: Implements `ByteEncode_{d_v} (Compress_{d_v} (v))` + * in @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L23]. + * with level-specific d_v defined in @[FIPS203, Table 2], + * and given by MLKEM_DV here. + * + **************************************************/ +static MLK_INLINE void mlk_poly_compress_dv( + uint8_t r[MLKEM_POLYCOMPRESSEDBYTES_DV], const mlk_poly *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(a, sizeof(mlk_poly))) + requires(array_bound(a->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + assigns(memory_slice(r, MLKEM_POLYCOMPRESSEDBYTES_DV))) +{ +#if MLKEM_DV == 4 + mlk_poly_compress_d4(r, a); +#elif MLKEM_DV == 5 + mlk_poly_compress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + + +#define mlk_poly_decompress_dv MLK_NAMESPACE_K(poly_decompress_dv) +/************************************************* + * Name: mlk_poly_decompress_dv + * + * Description: De-serialization and subsequent decompression (dv bits) of a + * polynomial; approximate inverse of poly_compress + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYCOMPRESSEDBYTES_DV bytes) + * + * Upon return, the coefficients of the output polynomial are unsigned-canonical + * (non-negative and smaller than MLKEM_Q). + * + * Specification: Implements `Decompress_{d_v} (ByteDecode_{d_v} (v))` + * in @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L4]. + * with level-specific d_v defined in @[FIPS203, Table 2], + * and given by MLKEM_DV here. + * + **************************************************/ +static MLK_INLINE void mlk_poly_decompress_dv( + mlk_poly *r, const uint8_t a[MLKEM_POLYCOMPRESSEDBYTES_DV]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYCOMPRESSEDBYTES_DV)) + requires(memory_no_alias(r, sizeof(mlk_poly))) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_bound(r->coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +{ +#if MLKEM_DV == 4 + mlk_poly_decompress_d4(r, a); +#elif MLKEM_DV == 5 + mlk_poly_decompress_d5(r, a); +#else +#error "Invalid value of MLKEM_DV" +#endif +} + +#define mlk_polyvec_compress_du MLK_NAMESPACE_K(polyvec_compress_du) +/************************************************* + * Name: mlk_polyvec_compress_du + * + * Description: Compress and serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECCOMPRESSEDBYTES_DU) + * - const mlk_polyvec a: pointer to input vector of polynomials. + * Coefficients must be unsigned canonical, + * i.e. in [0,1,..,MLKEM_Q-1]. + * + * Specification: Implements `ByteEncode_{d_u} (Compress_{d_u} (u))` + * in @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L22]. + * with level-specific d_u defined in @[FIPS203, Table 2], + * and given by MLKEM_DU here. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_compress_du(uint8_t r[MLKEM_POLYVECCOMPRESSEDBYTES_DU], + const mlk_polyvec *a) +__contract__( + requires(memory_no_alias(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(a, sizeof(mlk_polyvec))) + requires(forall(k0, 0, MLKEM_K, + array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) + assigns(memory_slice(r, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) +); + +#define mlk_polyvec_decompress_du MLK_NAMESPACE_K(polyvec_decompress_du) +/************************************************* + * Name: mlk_polyvec_decompress_du + * + * Description: De-serialize and decompress vector of polynomials; + * approximate inverse of mlk_polyvec_compress_du + * + * Arguments: - mlk_polyvec r: pointer to output vector of polynomials. + * Output will have coefficients normalized to [0,..,q-1]. + * - const uint8_t *a: pointer to input byte array + * (of length MLKEM_POLYVECCOMPRESSEDBYTES_DU) + * + * Specification: Implements `Decompress_{d_u} (ByteDecode_{d_u} (u))` + * in @[FIPS203, Algorithm 15 (K-PKE.Decrypt), L3]. + * with level-specific d_u defined in @[FIPS203, Table 2], + * and given by MLKEM_DU here. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_decompress_du(mlk_polyvec *r, + const uint8_t a[MLKEM_POLYVECCOMPRESSEDBYTES_DU]) +__contract__( + requires(memory_no_alias(a, MLKEM_POLYVECCOMPRESSEDBYTES_DU)) + requires(memory_no_alias(r, sizeof(mlk_polyvec))) + assigns(memory_slice(r, sizeof(mlk_polyvec))) + ensures(forall(k0, 0, MLKEM_K, + array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +); + +#define mlk_polyvec_tobytes MLK_NAMESPACE_K(polyvec_tobytes) +/************************************************* + * Name: mlk_polyvec_tobytes + * + * Description: Serialize vector of polynomials + * + * Arguments: - uint8_t *r: pointer to output byte array + * (needs space for MLKEM_POLYVECBYTES) + * - const mlk_polyvec a: pointer to input vector of polynomials + * Each polynomial must have coefficients in [0,..,q-1]. + * + * Specification: Implements ByteEncode_12 @[FIPS203, Algorithm 5]. + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * and @[FIPS203, 2.4.6, Matrices and Vectors] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const mlk_polyvec *a) +__contract__( + requires(memory_no_alias(a, sizeof(mlk_polyvec))) + requires(memory_no_alias(r, MLKEM_POLYVECBYTES)) + requires(forall(k0, 0, MLKEM_K, + array_bound(a->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) + assigns(memory_slice(r, MLKEM_POLYVECBYTES)) +); + +#define mlk_polyvec_frombytes MLK_NAMESPACE_K(polyvec_frombytes) +/************************************************* + * Name: mlk_polyvec_frombytes + * + * Description: De-serialize vector of polynomials; + * inverse of mlk_polyvec_tobytes + * + * Arguments: - const mlk_polyvec a: pointer to output vector of polynomials + * (of length MLKEM_POLYVECBYTES). Output will have coefficients + * normalized in [0..4095]. + * - uint8_t *r: pointer to input byte array + * + * Specification: Implements ByteDecode_12 @[FIPS203, Algorithm 6]. + * Extended to vectors as per + * @[FIPS203, 2.4.8 Applying Algorithms to Arrays] + * and @[FIPS203, 2.4.6, Matrices and Vectors] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_frombytes(mlk_polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_polyvec))) + requires(memory_no_alias(a, MLKEM_POLYVECBYTES)) + assigns(memory_slice(r, sizeof(mlk_polyvec))) + ensures(forall(k0, 0, MLKEM_K, + array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT))) +); + +#define mlk_polyvec_ntt MLK_NAMESPACE_K(polyvec_ntt) +/************************************************* + * Name: mlk_polyvec_ntt + * + * Description: Apply forward NTT to all elements of a vector of polynomials. + * + * The input is assumed to be in normal order and + * coefficient-wise bound by MLKEM_Q in absolute value. + * + * The output polynomial is in bitreversed order, and + * coefficient-wise bound by MLK_NTT_BOUND in absolute value. + * + * Arguments: - mlk_polyvec r: pointer to in/output vector of polynomials + * + * Specification: + * - Implements @[FIPS203, Algorithm 9, NTT] + * - Extended to vectors as per @[FIPS203, 2.4.6, Matrices and Vectors] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_ntt(mlk_polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_polyvec))) + requires(forall(j, 0, MLKEM_K, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) + assigns(memory_slice(r, sizeof(mlk_polyvec))) + ensures(forall(j, 0, MLKEM_K, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLK_NTT_BOUND))) +); + +#define mlk_polyvec_invntt_tomont MLK_NAMESPACE_K(polyvec_invntt_tomont) +/************************************************* + * Name: mlk_polyvec_invntt_tomont + * + * Description: Apply inverse NTT to all elements of a vector of polynomials + * and multiply by Montgomery factor 2^16 + * + * The input is assumed to be in bitreversed order, and can + * have arbitrary coefficients in int16_t. + * + * The output polynomial is in normal order, and + * coefficient-wise bound by MLK_INVNTT_BOUND in absolute value. + * + * Arguments: - mlk_polyvec r: pointer to in/output vector of polynomials + * + * Specification: + * - Implements @[FIPS203, Algorithm 10, NTT^{-1}] + * - Extended to vectors as per @[FIPS203, 2.4.6, Matrices and Vectors] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_invntt_tomont(mlk_polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_polyvec))) + assigns(memory_slice(r, sizeof(mlk_polyvec))) + ensures(forall(j, 0, MLKEM_K, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLK_INVNTT_BOUND))) +); + +#define mlk_polyvec_basemul_acc_montgomery_cached \ + MLK_NAMESPACE_K(polyvec_basemul_acc_montgomery_cached) +/************************************************* + * Name: mlk_polyvec_basemul_acc_montgomery_cached + * + * Description: Scalar product of two vectors of polynomials in NTT domain, + * using mulcache for second operand. + * + * Bounds: + * - Every coefficient of a is assumed to be in [0..4095] + * - No bounds guarantees for the coefficients in the result. + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const mlk_polyvec a: pointer to first input polynomial vector + * - const mlk_polyvec b: pointer to second input polynomial + * vector + * - const mlk_polyvec_mulcache b_cache: pointer to mulcache + * for second input polynomial vector. Can be computed + * via mlk_polyvec_mulcache_compute(). + * + * Specification: Implements + * - @[FIPS203, Section 2.4.7, Eq (2.14)] + * - @[FIPS203, Algorithm 11, MultiplyNTTs] + * - @[FIPS203, Algorithm 12, BaseCaseMultiply] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_basemul_acc_montgomery_cached( + mlk_poly *r, const mlk_polyvec *a, const mlk_polyvec *b, + const mlk_polyvec_mulcache *b_cache) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(a, sizeof(mlk_polyvec))) + requires(memory_no_alias(b, sizeof(mlk_polyvec))) + requires(memory_no_alias(b_cache, sizeof(mlk_polyvec_mulcache))) + requires(forall(k1, 0, MLKEM_K, + array_bound(a->vec[k1].coeffs, 0, MLKEM_N, 0, MLKEM_UINT12_LIMIT))) + assigns(memory_slice(r, sizeof(mlk_poly))) +); + +#define mlk_polyvec_mulcache_compute MLK_NAMESPACE_K(polyvec_mulcache_compute) +/************************************************************ + * Name: mlk_polyvec_mulcache_compute + * + * Description: Computes the mulcache for a vector of polynomials in NTT domain + * + * The mulcache of a degree-2 polynomial b := b0 + b1*X + * in Fq[X]/(X^2-zeta) is the value b1*zeta, needed when + * computing products of b in Fq[X]/(X^2-zeta). + * + * The mulcache of a polynomial in NTT domain -- which is + * a 128-tuple of degree-2 polynomials in Fq[X]/(X^2-zeta), + * for varying zeta, is the 128-tuple of mulcaches of those + * polynomials. + * + * The mulcache of a vector of polynomials is the vector + * of mulcaches of its entries. + * + * Arguments: - x: Pointer to mulcache to be populated + * - a: Pointer to input polynomial vector + * + * Specification: + * - Caches `b_1 * \gamma` in @[FIPS203, Algorithm 12, BaseCaseMultiply, L1] + * + ************************************************************/ +/* + * NOTE: The default C implementation of this function populates + * the mulcache with values in (-q,q), but this is not needed for the + * higher level safety proofs, and thus not part of the spec. + */ +MLK_INTERNAL_API +void mlk_polyvec_mulcache_compute(mlk_polyvec_mulcache *x, const mlk_polyvec *a) +__contract__( + requires(memory_no_alias(x, sizeof(mlk_polyvec_mulcache))) + requires(memory_no_alias(a, sizeof(mlk_polyvec))) + assigns(memory_slice(x, sizeof(mlk_polyvec_mulcache))) +); + +#define mlk_polyvec_reduce MLK_NAMESPACE_K(polyvec_reduce) +/************************************************* + * Name: mlk_polyvec_reduce + * + * Description: Applies Barrett reduction to each coefficient + * of each element of a vector of polynomials; + * for details of the Barrett reduction see comments in poly.c + * + * Arguments: - mlk_polyvec r: pointer to input/output polynomial + * + * Specification: Normalizes on unsigned canoncial representatives + * ahead of calling @[FIPS203, Compress_d, Eq (4.7)]. + * This is not made explicit in FIPS 203. + * + **************************************************/ +/* + * NOTE: The semantics of mlk_polyvec_reduce() is different in + * the reference implementation, which requires + * signed canonical output data. Unsigned canonical + * outputs are better suited to the only remaining + * use of mlk_poly_reduce() in the context of (de)serialization. + */ +MLK_INTERNAL_API +void mlk_polyvec_reduce(mlk_polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_polyvec))) + assigns(memory_slice(r, sizeof(mlk_polyvec))) + ensures(forall(k0, 0, MLKEM_K, + array_bound(r->vec[k0].coeffs, 0, MLKEM_N, 0, MLKEM_Q))) +); + +#define mlk_polyvec_add MLK_NAMESPACE_K(polyvec_add) +/************************************************* + * Name: mlk_polyvec_add + * + * Description: Add vectors of polynomials + * + * Arguments: - mlk_polyvec r: pointer to input-output vector of polynomials to + * be added to + * - const mlk_polyvec b: pointer to second input vector of + * polynomials + * + * The coefficients of r and b must be so that the addition does + * not overflow. Otherwise, the behaviour of this function is undefined. + * + * The coefficients returned in *r are in int16_t which is sufficient + * to prove type-safety of calling units. Therefore, no stronger + * ensures clause is required on this function. + * + * Specification: + * - @[FIPS203, 2.4.5, Arithmetic With Polynomials and NTT Representations] + * - Used in @[FIPS203, Algorithm 14 (K-PKE.Encrypt), L19] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_add(mlk_polyvec *r, const mlk_polyvec *b) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_polyvec))) + requires(memory_no_alias(b, sizeof(mlk_polyvec))) + requires(forall(j0, 0, MLKEM_K, + forall(k0, 0, MLKEM_N, + (int32_t)r->vec[j0].coeffs[k0] + b->vec[j0].coeffs[k0] <= INT16_MAX))) + requires(forall(j1, 0, MLKEM_K, + forall(k1, 0, MLKEM_N, + (int32_t)r->vec[j1].coeffs[k1] + b->vec[j1].coeffs[k1] >= INT16_MIN))) + assigns(memory_slice(r, sizeof(mlk_polyvec))) +); + +#define mlk_polyvec_tomont MLK_NAMESPACE_K(polyvec_tomont) +/************************************************* + * Name: mlk_polyvec_tomont + * + * Description: Inplace conversion of all coefficients of a polynomial + * vector from normal domain to Montgomery domain + * + * Bounds: Output < q in absolute value. + * + * + * Specification: Internal normalization required in `mlk_indcpa_keypair_derand` + * as part of matrix-vector multiplication + * @[FIPS203, Algorithm 13, K-PKE.KeyGen, L18]. + * + **************************************************/ +MLK_INTERNAL_API +void mlk_polyvec_tomont(mlk_polyvec *r) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_polyvec))) + assigns(memory_slice(r, sizeof(mlk_polyvec))) + ensures(forall(j, 0, MLKEM_K, + array_abs_bound(r->vec[j].coeffs, 0, MLKEM_N, MLKEM_Q))) +); + +#define mlk_poly_getnoise_eta1_4x MLK_NAMESPACE_K(poly_getnoise_eta1_4x) +/************************************************* + * Name: mlk_poly_getnoise_eta1_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1. + * + * Arguments: - mlk_poly *r{0,1,2,3}: pointer to output polynomial. The last + * polynomial pointer may be NULL. + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + * + * Specification: + * Implements 4x `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))`: + * - @[FIPS203, Algorithm 8, SamplePolyCBD_eta] + * - @[FIPS203, Eq (4.3), PRF_eta] + * - `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))` appears in + * @[FIPS203, Algorithm 13, K-PKE.KeyGen, L{9, 13}] + * @[FIPS203, Algorithm 14, K-PKE.Encrypt, L10] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_getnoise_eta1_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2, + mlk_poly *r3, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, uint8_t nonce2, + uint8_t nonce3) +__contract__( + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + requires(memory_no_alias(r0, sizeof(mlk_poly))) + requires(memory_no_alias(r1, sizeof(mlk_poly))) + requires(memory_no_alias(r2, sizeof(mlk_poly))) + requires(r3 == NULL || memory_no_alias(r3, sizeof(mlk_poly))) + assigns(memory_slice(r0, sizeof(mlk_poly))) + assigns(memory_slice(r1, sizeof(mlk_poly))) + assigns(memory_slice(r2, sizeof(mlk_poly))) + assigns(r3 != NULL: memory_slice(r3, sizeof(mlk_poly))) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)) + ensures(array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)) + ensures(array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)) + ensures(r3 != NULL ==> array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1)) +); + +#if MLKEM_ETA1 == MLKEM_ETA2 +/* + * We only require mlk_poly_getnoise_eta2_4x for ml-kem-768 and ml-kem-1024 + * where MLKEM_ETA2 = MLKEM_ETA1 = 2. + * For ml-kem-512, mlk_poly_getnoise_eta1122_4x is used instead. + */ +#define mlk_poly_getnoise_eta2_4x mlk_poly_getnoise_eta1_4x +#endif /* MLKEM_ETA1 == MLKEM_ETA2 */ + +#if MLKEM_K == 2 || MLKEM_K == 4 +#define mlk_poly_getnoise_eta2 MLK_NAMESPACE_K(poly_getnoise_eta2) +/************************************************* + * Name: mlk_poly_getnoise_eta2 + * + * Description: Sample a polynomial deterministically from a seed and a nonce, + * with output polynomial close to centered binomial distribution + * with parameter MLKEM_ETA2 + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce: one-byte input nonce + * + * Specification: + * Implements `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))`: + * - @[FIPS203, Algorithm 8, SamplePolyCBD_eta] + * - @[FIPS203, Eq (4.3), PRF_eta] + * - `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))` appears in + * @[FIPS203, Algorithm 14, K-PKE.Encrypt, L14] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_getnoise_eta2(mlk_poly *r, const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce) +__contract__( + requires(memory_no_alias(r, sizeof(mlk_poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(memory_slice(r, sizeof(mlk_poly))) + ensures(array_abs_bound(r->coeffs, 0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 || MLKEM_K == 4 */ + +#if MLKEM_K == 2 +#define mlk_poly_getnoise_eta1122_4x MLK_NAMESPACE_K(poly_getnoise_eta1122_4x) +/************************************************* + * Name: mlk_poly_getnoise_eta1122_4x + * + * Description: Batch sample four polynomials deterministically from a seed + * and a nonces, with output polynomials close to centered binomial + * distribution with parameter MLKEM_ETA1 and MLKEM_ETA2 + * + * Arguments: - mlk_poly *r{0,1,2,3}: pointer to output polynomial + * - const uint8_t *seed: pointer to input seed + * (of length MLKEM_SYMBYTES bytes) + * - uint8_t nonce{0,1,2,3}: one-byte input nonce + * + * Specification: + * Implements two instances each of + * `SamplePolyCBD_{eta1} (PRF_{eta1} (sigma, N))` and + * `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))`: + * - @[FIPS203, Algorithm 8, SamplePolyCBD_eta] + * - @[FIPS203, Eq (4.3), PRF_eta] + * - `SamplePolyCBD_{eta2} (PRF_{eta2} (sigma, N))` appears in + * @[FIPS203, Algorithm 14, K-PKE.Encrypt, L14] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_getnoise_eta1122_4x(mlk_poly *r0, mlk_poly *r1, mlk_poly *r2, + mlk_poly *r3, + const uint8_t seed[MLKEM_SYMBYTES], + uint8_t nonce0, uint8_t nonce1, + uint8_t nonce2, uint8_t nonce3) +__contract__( + requires(memory_no_alias(r0, sizeof(mlk_poly))) + requires(memory_no_alias(r1, sizeof(mlk_poly))) + requires(memory_no_alias(r2, sizeof(mlk_poly))) + requires(memory_no_alias(r3, sizeof(mlk_poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES)) + assigns(memory_slice(r0, sizeof(mlk_poly))) + assigns(memory_slice(r1, sizeof(mlk_poly))) + assigns(memory_slice(r2, sizeof(mlk_poly))) + assigns(memory_slice(r3, sizeof(mlk_poly))) + ensures(array_abs_bound(r0->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r1->coeffs,0, MLKEM_N, MLKEM_ETA1 + 1) + && array_abs_bound(r2->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1) + && array_abs_bound(r3->coeffs,0, MLKEM_N, MLKEM_ETA2 + 1)) +); +#endif /* MLKEM_K == 2 */ + +#endif /* !MLK_POLY_K_H */ diff --git a/mlkem_native/src/randombytes.h b/mlkem_native/src/randombytes.h new file mode 100644 index 0000000..3e841d2 --- /dev/null +++ b/mlkem_native/src/randombytes.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_RANDOMBYTES_H +#define MLK_RANDOMBYTES_H + + +#include "cbmc.h" +#include "common.h" + +#if !defined(MLK_CONFIG_NO_RANDOMIZED_API) +#if !defined(MLK_CONFIG_CUSTOM_RANDOMBYTES) +/************************************************* + * Name: randombytes + * + * Description: Fill a buffer with cryptographically secure random bytes. + * + * mlkem-native does not provide an implementation of this + * function. It must be provided by the consumer. + * + * To use a custom random byte source with a different name + * or signature, set MLK_CONFIG_CUSTOM_RANDOMBYTES and define + * mlk_randombytes directly. + * + * Arguments: - uint8_t *out: pointer to output buffer + * - size_t outlen: number of random bytes to write + * + * Returns: 0 on success, non-zero on failure. + * On failure, top-level APIs return MLK_ERR_RNG_FAIL. + * + **************************************************/ +int randombytes(uint8_t *out, size_t outlen); + +/************************************************* + * Name: mlk_randombytes + * + * Description: Internal wrapper around randombytes(). + * + * Fill a buffer with cryptographically secure random bytes. + * + * This function can be replaced by setting + * MLK_CONFIG_CUSTOM_RANDOMBYTES and defining mlk_randombytes + * directly. + * + * Arguments: - uint8_t *out: pointer to output buffer + * - size_t outlen: number of random bytes to write + * + * Returns: 0 on success, non-zero on failure. + * On failure, top-level APIs return MLK_ERR_RNG_FAIL. + * + **************************************************/ +MLK_MUST_CHECK_RETURN_VALUE +static MLK_INLINE int mlk_randombytes(uint8_t *out, size_t outlen) +__contract__( + requires(memory_no_alias(out, outlen)) + assigns(memory_slice(out, outlen))) { return randombytes(out, outlen); } +#endif /* !MLK_CONFIG_CUSTOM_RANDOMBYTES */ +#endif /* !MLK_CONFIG_NO_RANDOMIZED_API */ +#endif /* !MLK_RANDOMBYTES_H */ diff --git a/mlkem_native/src/sampling.c b/mlkem_native/src/sampling.c new file mode 100644 index 0000000..945d12e --- /dev/null +++ b/mlkem_native/src/sampling.c @@ -0,0 +1,362 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + */ + +#include "common.h" +#if !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) + +#include "debug.h" +#include "sampling.h" +#include "symmetric.h" + +/* Reference: `rej_uniform()` in the reference implementation @[REF]. + * - Our signature differs from the reference implementation + * in that it adds the offset and always expects the base of the + * target buffer. This avoids shifting the buffer base in the + * caller, which appears tricky to reason about. */ +MLK_STATIC_TESTABLE unsigned mlk_rej_uniform_c(int16_t *r, unsigned target, + unsigned offset, + const uint8_t *buf, + unsigned buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(array_bound(r, 0, return_value, 0, MLKEM_Q))) +{ + unsigned ctr, pos; + int16_t val0, val1; + + mlk_assert_bound(r, offset, 0, MLKEM_Q); + + ctr = offset; + pos = 0; + /* pos + 3 cannot overflow due to the assumption buflen <= 4096 */ + while (ctr < target && pos + 3 <= buflen) + __loop__( + invariant(offset <= ctr && ctr <= target && pos <= buflen) + invariant(array_bound(r, 0, ctr, 0, MLKEM_Q))) + { + val0 = ((buf[pos + 0] >> 0) | (buf[pos + 1] << 8)) & 0xFFF; + val1 = ((buf[pos + 1] >> 4) | (buf[pos + 2] << 4)) & 0xFFF; + pos += 3; + + if (val0 < MLKEM_Q) + { + r[ctr++] = val0; + } + if (ctr < target && val1 < MLKEM_Q) + { + r[ctr++] = val1; + } + } + + mlk_assert_bound(r, ctr, 0, MLKEM_Q); + return ctr; +} + +/************************************************* + * Name: mlk_rej_uniform + * + * Description: Run rejection sampling on uniform random bytes to generate + * uniform random integers mod q + * + * Arguments: - int16_t *r: pointer to output buffer + * - unsigned target: requested number of 16-bit integers + * (uniform mod q). + * Must be <= 4096. + * - unsigned offset: number of 16-bit integers that have + * already been sampled. + * Must be <= target. + * - const uint8_t *buf: pointer to input buffer + * (assumed to be uniform random bytes) + * - unsigned buflen: length of input buffer in bytes + * Must be <= 4096. + * Must be a multiple of 3. + * + * Note: Strictly speaking, only a few values of buflen near UINT_MAX need + * excluding. The limit of 4096 is somewhat arbitrary but sufficient for all + * uses of this function. Similarly, the actual limit for target is UINT_MAX/2. + * + * Returns the new offset of sampled 16-bit integers, at most target, + * and at least the initial offset. + * If the new offset is strictly less than len, all of the input buffers + * is guaranteed to have been consumed. If it is equal to len, no information + * is provided on how many bytes of the input buffer have been consumed. + **************************************************/ + +/* Reference: `rej_uniform()` in the reference implementation @[REF]. + * - Our signature differs from the reference implementation + * in that it adds the offset and always expects the base of the + * target buffer. This avoids shifting the buffer base in the + * caller, which appears tricky to reason about. + * - Optional fallback to native implementation. */ +static unsigned mlk_rej_uniform(int16_t *r, unsigned target, unsigned offset, + const uint8_t *buf, unsigned buflen) +__contract__( + requires(offset <= target && target <= 4096 && buflen <= 4096 && buflen % 3 == 0) + requires(memory_no_alias(r, sizeof(int16_t) * target)) + requires(memory_no_alias(buf, buflen)) + requires(array_bound(r, 0, offset, 0, MLKEM_Q)) + assigns(memory_slice(r, sizeof(int16_t) * target)) + ensures(offset <= return_value && return_value <= target) + ensures(array_bound(r, 0, return_value, 0, MLKEM_Q)) +) +{ +#if defined(MLK_USE_NATIVE_REJ_UNIFORM) + if (offset == 0) + { + int ret; + ret = mlk_rej_uniform_native(r, target, buf, buflen); + if (ret != MLK_NATIVE_FUNC_FALLBACK) + { + unsigned res = (unsigned)ret; + mlk_assert_bound(r, res, 0, MLKEM_Q); + return res; + } + } +#endif /* MLK_USE_NATIVE_REJ_UNIFORM */ + + return mlk_rej_uniform_c(r, target, offset, buf, buflen); +} + +#ifndef MLKEM_GEN_MATRIX_NBLOCKS +#define MLKEM_GEN_MATRIX_NBLOCKS \ + ((12 * MLKEM_N / 8 * ((uint32_t)1 << 12) / MLKEM_Q + MLK_XOF_RATE) / \ + MLK_XOF_RATE) +#endif + +#if !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) +/* Reference: Does not exist in the reference implementation @[REF]. + * - x4-batched version of `rej_uniform()` from the + * reference implementation, leveraging x4-batched Keccak-f1600. */ +MLK_INTERNAL_API +void mlk_poly_rej_uniform_x4(mlk_poly *vec0, mlk_poly *vec1, mlk_poly *vec2, + mlk_poly *vec3, + uint8_t seed[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)]) +{ + /* Temporary buffers for XOF output before rejection sampling */ + MLK_ALIGN uint8_t + buf[4][MLK_ALIGN_UP(MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE)]; + + /* Tracks the number of coefficients we have already sampled */ + unsigned ctr[4]; + mlk_xof_x4_ctx statex; + unsigned buflen; + + mlk_xof_x4_init(&statex); + mlk_xof_x4_absorb(&statex, seed, MLKEM_SYMBYTES + 2); + + /* + * Initially, squeeze heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + * This should generate the matrix entries with high probability. + */ + mlk_xof_x4_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &statex); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE; + ctr[0] = mlk_rej_uniform(vec0->coeffs, MLKEM_N, 0, buf[0], buflen); + ctr[1] = mlk_rej_uniform(vec1->coeffs, MLKEM_N, 0, buf[1], buflen); + ctr[2] = mlk_rej_uniform(vec2->coeffs, MLKEM_N, 0, buf[2], buflen); + ctr[3] = mlk_rej_uniform(vec3->coeffs, MLKEM_N, 0, buf[3], buflen); + + /* + * So long as not all matrix entries have been generated, squeeze + * one more block a time until we're done. + */ + buflen = MLK_XOF_RATE; + while (ctr[0] < MLKEM_N || ctr[1] < MLKEM_N || ctr[2] < MLKEM_N || + ctr[3] < MLKEM_N) + __loop__( + assigns(ctr, statex, + memory_slice(vec0, sizeof(mlk_poly)), + memory_slice(vec1, sizeof(mlk_poly)), + memory_slice(vec2, sizeof(mlk_poly)), + memory_slice(vec3, sizeof(mlk_poly)), + object_whole(buf)) + invariant(ctr[0] <= MLKEM_N && ctr[1] <= MLKEM_N) + invariant(ctr[2] <= MLKEM_N && ctr[3] <= MLKEM_N) + invariant(array_bound(vec0->coeffs, 0, ctr[0], 0, MLKEM_Q)) + invariant(array_bound(vec1->coeffs, 0, ctr[1], 0, MLKEM_Q)) + invariant(array_bound(vec2->coeffs, 0, ctr[2], 0, MLKEM_Q)) + invariant(array_bound(vec3->coeffs, 0, ctr[3], 0, MLKEM_Q))) + { + mlk_xof_x4_squeezeblocks(buf, 1, &statex); + ctr[0] = mlk_rej_uniform(vec0->coeffs, MLKEM_N, ctr[0], buf[0], buflen); + ctr[1] = mlk_rej_uniform(vec1->coeffs, MLKEM_N, ctr[1], buf[1], buflen); + ctr[2] = mlk_rej_uniform(vec2->coeffs, MLKEM_N, ctr[2], buf[2], buflen); + ctr[3] = mlk_rej_uniform(vec3->coeffs, MLKEM_N, ctr[3], buf[3], buflen); + } + + mlk_xof_x4_release(&statex); + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(buf, sizeof(buf)); +} +#endif /* !MLK_CONFIG_SERIAL_FIPS202_ONLY */ + +MLK_INTERNAL_API +void mlk_poly_rej_uniform(mlk_poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +{ + mlk_xof_ctx state; + MLK_ALIGN uint8_t buf[MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE]; + unsigned ctr, buflen; + + mlk_xof_init(&state); + mlk_xof_absorb(&state, seed, MLKEM_SYMBYTES + 2); + + /* Initially, squeeze + sample heuristic number of MLKEM_GEN_MATRIX_NBLOCKS. + */ + /* This should generate the matrix entry with high probability. */ + mlk_xof_squeezeblocks(buf, MLKEM_GEN_MATRIX_NBLOCKS, &state); + buflen = MLKEM_GEN_MATRIX_NBLOCKS * MLK_XOF_RATE; + ctr = mlk_rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen); + + /* Squeeze + sample one more block a time until we're done */ + buflen = MLK_XOF_RATE; + while (ctr < MLKEM_N) + __loop__( + assigns(ctr, state, memory_slice(entry, sizeof(mlk_poly)), object_whole(buf)) + invariant(ctr <= MLKEM_N) + invariant(array_bound(entry->coeffs, 0, ctr, 0, MLKEM_Q))) + { + mlk_xof_squeezeblocks(buf, 1, &state); + ctr = mlk_rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, buflen); + } + + mlk_xof_release(&state); + + /* Specification: Partially implements + * @[FIPS203, Section 3.3, Destruction of intermediate values] */ + mlk_zeroize(buf, sizeof(buf)); +} + +/************************************************* + * Name: mlk_load32_littleendian + * + * Description: load 4 bytes into a 32-bit integer + * in little-endian order + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x + * + **************************************************/ + +/* Reference: `load32_littleendian()` in the reference implementation @[REF]. */ +static uint32_t mlk_load32_littleendian(const uint8_t x[4]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/* Reference: `cbd2()` in the reference implementation @[REF]. */ +MLK_INTERNAL_API +void mlk_poly_cbd2(mlk_poly *r, const uint8_t buf[2 * MLKEM_N / 4]) +{ + unsigned i; + for (i = 0; i < MLKEM_N / 8; i++) + __loop__( + invariant(i <= MLKEM_N / 8) + invariant(array_abs_bound(r->coeffs, 0, 8 * i, 3))) + { + unsigned j; + uint32_t t = mlk_load32_littleendian(buf + 4 * i); + uint32_t d = t & 0x55555555; + d += (t >> 1) & 0x55555555; + + for (j = 0; j < 8; j++) + __loop__( + invariant(i <= MLKEM_N / 8 && j <= 8) + invariant(array_abs_bound(r->coeffs, 0, 8 * i + j, 3))) + { + const int16_t a = (d >> (4 * j + 0)) & 0x3; + const int16_t b = (d >> (4 * j + 2)) & 0x3; + r->coeffs[8 * i + j] = (int16_t)(a - b); + } + } +} + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_ETA1 == 3 +/************************************************* + * Name: mlk_load24_littleendian + * + * Description: load 3 bytes into a 32-bit integer + * in little-endian order. + * This function is only needed for ML-KEM-512 + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns 32-bit unsigned integer loaded from x (most significant byte is zero) + * + **************************************************/ + +/* Reference: `load24_littleendian()` in the reference implementation @[REF]. */ +static uint32_t mlk_load24_littleendian(const uint8_t x[3]) +{ + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + return r; +} + +/* Reference: `cbd3()` in the reference implementation @[REF]. */ +MLK_INTERNAL_API +void mlk_poly_cbd3(mlk_poly *r, const uint8_t buf[3 * MLKEM_N / 4]) +{ + unsigned i; + for (i = 0; i < MLKEM_N / 4; i++) + __loop__( + invariant(i <= MLKEM_N / 4) + invariant(array_abs_bound(r->coeffs, 0, 4 * i, 4))) + { + unsigned j; + const uint32_t t = mlk_load24_littleendian(buf + 3 * i); + uint32_t d = t & 0x00249249; + d += (t >> 1) & 0x00249249; + d += (t >> 2) & 0x00249249; + + for (j = 0; j < 4; j++) + __loop__( + invariant(i <= MLKEM_N / 4 && j <= 4) + invariant(array_abs_bound(r->coeffs, 0, 4 * i + j, 4))) + { + const int16_t a = (d >> (6 * j + 0)) & 0x7; + const int16_t b = (d >> (6 * j + 3)) & 0x7; + r->coeffs[4 * i + j] = (int16_t)(a - b); + } + } +} +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_ETA1 == 3 */ + +#else /* !MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +MLK_EMPTY_CU(sampling) + +#endif /* MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +/* To facilitate single-compilation-unit (SCU) builds, undefine all macros. + * Don't modify by hand -- this is auto-generated by scripts/autogen. */ +#undef MLKEM_GEN_MATRIX_NBLOCKS diff --git a/mlkem_native/src/sampling.h b/mlkem_native/src/sampling.h new file mode 100644 index 0000000..24c26b3 --- /dev/null +++ b/mlkem_native/src/sampling.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + */ + +#ifndef MLK_SAMPLING_H +#define MLK_SAMPLING_H + +#include "cbmc.h" +#include "common.h" +#include "poly.h" + +#define mlk_poly_cbd2 MLK_NAMESPACE(poly_cbd2) +/************************************************* + * Name: mlk_poly_cbd2 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=2 + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + * + * Specification: Implements @[FIPS203, Algorithm 8, SamplePolyCBD_2] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_cbd2(mlk_poly *r, const uint8_t buf[2 * MLKEM_N / 4]); + +#if defined(MLK_CONFIG_MULTILEVEL_WITH_SHARED) || MLKEM_ETA1 == 3 +#define mlk_poly_cbd3 MLK_NAMESPACE(poly_cbd3) +/************************************************* + * Name: mlk_poly_cbd3 + * + * Description: Given an array of uniformly random bytes, compute + * polynomial with coefficients distributed according to + * a centered binomial distribution with parameter eta=3. + * This function is only needed for ML-KEM-512 + * + * Arguments: - mlk_poly *r: pointer to output polynomial + * - const uint8_t *buf: pointer to input byte array + * + * Specification: Implements @[FIPS203, Algorithm 8, SamplePolyCBD_3] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_cbd3(mlk_poly *r, const uint8_t buf[3 * MLKEM_N / 4]); +#endif /* MLK_CONFIG_MULTILEVEL_WITH_SHARED || MLKEM_ETA1 == 3 */ + +#if !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) +#define mlk_poly_rej_uniform_x4 MLK_NAMESPACE(poly_rej_uniform_x4) +/************************************************* + * Name: mlk_poly_rej_uniform_x4 + * + * Description: Generate four polynomials using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - mlk_poly *vec0, *vec1, *vec2, *vec3: + * Pointers to 4 polynomials to be sampled. + * - uint8_t seed[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)]: + * Pointer consecutive array of seed buffers of size + * MLKEM_SYMBYTES + 2 each, plus padding for alignment. + * + * Specification: Implements @[FIPS203, Algorithm 7, SampleNTT] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_rej_uniform_x4(mlk_poly *vec0, mlk_poly *vec1, mlk_poly *vec2, + mlk_poly *vec3, + uint8_t seed[4][MLK_ALIGN_UP(MLKEM_SYMBYTES + 2)]) +__contract__( + requires(memory_no_alias(vec0, sizeof(mlk_poly))) + requires(memory_no_alias(vec1, sizeof(mlk_poly))) + requires(memory_no_alias(vec2, sizeof(mlk_poly))) + requires(memory_no_alias(vec3, sizeof(mlk_poly))) + requires(memory_no_alias(seed, 4 * MLK_ALIGN_UP(MLKEM_SYMBYTES + 2))) + assigns(memory_slice(vec0, sizeof(mlk_poly))) + assigns(memory_slice(vec1, sizeof(mlk_poly))) + assigns(memory_slice(vec2, sizeof(mlk_poly))) + assigns(memory_slice(vec3, sizeof(mlk_poly))) + ensures(array_bound(vec0->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec1->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec2->coeffs, 0, MLKEM_N, 0, MLKEM_Q)) + ensures(array_bound(vec3->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); +#endif /* !MLK_CONFIG_SERIAL_FIPS202_ONLY */ + +#define mlk_poly_rej_uniform MLK_NAMESPACE(poly_rej_uniform) +/************************************************* + * Name: mlk_poly_rej_uniform + * + * Description: Generate polynomial using rejection sampling + * on (pseudo-)uniformly random bytes sampled from a seed. + * + * Arguments: - mlk_poly *vec: Pointer to polynomial to be sampled. + * - uint8_t *seed: Pointer to seed buffer of size + * MLKEM_SYMBYTES + 2 each. + * + * Specification: Implements @[FIPS203, Algorithm 7, SampleNTT] + * + **************************************************/ +MLK_INTERNAL_API +void mlk_poly_rej_uniform(mlk_poly *entry, uint8_t seed[MLKEM_SYMBYTES + 2]) +__contract__( + requires(memory_no_alias(entry, sizeof(mlk_poly))) + requires(memory_no_alias(seed, MLKEM_SYMBYTES + 2)) + assigns(memory_slice(entry, sizeof(mlk_poly))) + ensures(array_bound(entry->coeffs, 0, MLKEM_N, 0, MLKEM_Q))); + +#endif /* !MLK_SAMPLING_H */ diff --git a/mlkem_native/src/symmetric.h b/mlkem_native/src/symmetric.h new file mode 100644 index 0000000..68d7e1a --- /dev/null +++ b/mlkem_native/src/symmetric.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + */ + +#ifndef MLK_SYMMETRIC_H +#define MLK_SYMMETRIC_H + + +#include "cbmc.h" +#include "common.h" +#include MLK_FIPS202_HEADER_FILE +#if !defined(MLK_CONFIG_SERIAL_FIPS202_ONLY) +#include MLK_FIPS202X4_HEADER_FILE +#endif + +/* Macros denoting FIPS 203 specific Hash functions */ + +/* Hash function H, @[FIPS203, Section 4.1, Eq (4.4)] */ +#define mlk_hash_h(OUT, IN, INBYTES) mlk_sha3_256(OUT, IN, INBYTES) + +/* Hash function G, @[FIPS203, Section 4.1, Eq (4.5)] */ +#define mlk_hash_g(OUT, IN, INBYTES) mlk_sha3_512(OUT, IN, INBYTES) + +/* Hash function J, @[FIPS203, Section 4.1, Eq (4.4)] */ +#define mlk_hash_j(OUT, IN, INBYTES) \ + mlk_shake256(OUT, MLKEM_SYMBYTES, IN, INBYTES) + +/* PRF function, @[FIPS203, Section 4.1, Eq (4.3)] + * Referring to (eq 4.3), `OUT` is assumed to contain `s || b`. */ +#define mlk_prf_eta(ETA, OUT, IN) \ + mlk_shake256(OUT, (ETA) * MLKEM_N / 4, IN, MLKEM_SYMBYTES + 1) +#define mlk_prf_eta1(OUT, IN) mlk_prf_eta(MLKEM_ETA1, OUT, IN) +#define mlk_prf_eta2(OUT, IN) mlk_prf_eta(MLKEM_ETA2, OUT, IN) +#define mlk_prf_eta1_x4(OUT, IN) \ + mlk_shake256x4((OUT)[0], (OUT)[1], (OUT)[2], (OUT)[3], \ + (MLKEM_ETA1 * MLKEM_N / 4), (IN)[0], (IN)[1], (IN)[2], \ + (IN)[3], MLKEM_SYMBYTES + 1) + +/* XOF function, FIPS 203 4.1 */ +#define mlk_xof_ctx mlk_shake128ctx +#define mlk_xof_x4_ctx mlk_shake128x4ctx +#define mlk_xof_init(CTX) mlk_shake128_init((CTX)) +#define mlk_xof_absorb(CTX, IN, INBYTES) \ + mlk_shake128_absorb_once((CTX), (IN), (INBYTES)) +#define mlk_xof_squeezeblocks(BUF, NBLOCKS, CTX) \ + mlk_shake128_squeezeblocks((BUF), (NBLOCKS), (CTX)) +#define mlk_xof_release(CTX) mlk_shake128_release((CTX)) + +#define mlk_xof_x4_init(CTX) mlk_shake128x4_init((CTX)) +#define mlk_xof_x4_absorb(CTX, IN, INBYTES) \ + mlk_shake128x4_absorb_once((CTX), (IN)[0], (IN)[1], (IN)[2], (IN)[3], \ + (INBYTES)) +#define mlk_xof_x4_squeezeblocks(BUF, NBLOCKS, CTX) \ + mlk_shake128x4_squeezeblocks((BUF)[0], (BUF)[1], (BUF)[2], (BUF)[3], \ + (NBLOCKS), (CTX)) +#define mlk_xof_x4_release(CTX) mlk_shake128x4_release((CTX)) + +#define MLK_XOF_RATE SHAKE128_RATE + +#endif /* !MLK_SYMMETRIC_H */ diff --git a/mlkem_native/src/sys.h b/mlkem_native/src/sys.h new file mode 100644 index 0000000..0ab8947 --- /dev/null +++ b/mlkem_native/src/sys.h @@ -0,0 +1,260 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#ifndef MLK_SYS_H +#define MLK_SYS_H + +#if !defined(MLK_CONFIG_NO_ASM) && (defined(__GNUC__) || defined(__clang__)) +#define MLK_HAVE_INLINE_ASM +#endif + +/* Try to find endianness, if not forced through CFLAGS already */ +#if !defined(MLK_SYS_LITTLE_ENDIAN) && !defined(MLK_SYS_BIG_ENDIAN) +#if defined(__BYTE_ORDER__) +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define MLK_SYS_LITTLE_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define MLK_SYS_BIG_ENDIAN +#else +#error "__BYTE_ORDER__ defined, but don't recognize value." +#endif +#endif /* __BYTE_ORDER__ */ + +/* MSVC does not define __BYTE_ORDER__. However, MSVC only supports + * little endian x86, x86_64, and AArch64. It is, hence, safe to assume + * little endian. */ +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64) || \ + defined(_M_IX86) || defined(_M_ARM64)) +#define MLK_SYS_LITTLE_ENDIAN +#endif + +#endif /* !MLK_SYS_LITTLE_ENDIAN && !MLK_SYS_BIG_ENDIAN */ + +/* Check if we're running on an AArch64 little endian system. _M_ARM64 is set by + * MSVC. */ +#if defined(__AARCH64EL__) || defined(_M_ARM64) +#define MLK_SYS_AARCH64 +#endif + +/* Check if we're running on an AArch64 big endian system. */ +#if defined(__AARCH64EB__) +#define MLK_SYS_AARCH64_EB +#endif + +/* Check if we're running on an Armv8.1-M system with MVE */ +#if defined(__ARM_ARCH_8_1M_MAIN__) || defined(__ARM_FEATURE_MVE) +#define MLK_SYS_ARMV81M_MVE +#endif + +#if defined(__x86_64__) +#define MLK_SYS_X86_64 +#if defined(__AVX2__) +#define MLK_SYS_X86_64_AVX2 +#endif +#endif /* __x86_64__ */ + +#if defined(MLK_SYS_LITTLE_ENDIAN) && defined(__powerpc64__) +#define MLK_SYS_PPC64LE +#endif + +#if defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64 +#define MLK_SYS_RISCV64 +#endif + +#if defined(MLK_SYS_RISCV64) && defined(__riscv_vector) && \ + defined(__riscv_v_intrinsic) +#define MLK_SYS_RISCV64_RVV +#endif + +#if defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 32 +#define MLK_SYS_RISCV32 +#endif + +#if defined(_WIN32) +#define MLK_SYS_WINDOWS +#endif + +#if defined(__linux__) +#define MLK_SYS_LINUX +#endif + +#if defined(__APPLE__) +#define MLK_SYS_APPLE +#endif + +#if defined(MLK_FORCE_AARCH64) && !defined(MLK_SYS_AARCH64) +#error "MLK_FORCE_AARCH64 is set, but we don't seem to be on an AArch64 system." +#endif + +#if defined(MLK_FORCE_AARCH64_EB) && !defined(MLK_SYS_AARCH64_EB) +#error \ + "MLK_FORCE_AARCH64_EB is set, but we don't seem to be on an AArch64 system." +#endif + +#if defined(MLK_FORCE_X86_64) && !defined(MLK_SYS_X86_64) +#error "MLK_FORCE_X86_64 is set, but we don't seem to be on an X86_64 system." +#endif + +#if defined(MLK_FORCE_PPC64LE) && !defined(MLK_SYS_PPC64LE) +#error "MLK_FORCE_PPC64LE is set, but we don't seem to be on a PPC64LE system." +#endif + +#if defined(MLK_FORCE_RISCV64) && !defined(MLK_SYS_RISCV64) +#error "MLK_FORCE_RISCV64 is set, but we don't seem to be on a RISCV64 system." +#endif + +#if defined(MLK_FORCE_RISCV32) && !defined(MLK_SYS_RISCV32) +#error "MLK_FORCE_RISCV32 is set, but we don't seem to be on a RISCV32 system." +#endif + +/* + * MLK_INLINE: Hint for inlining. + * - MSVC: __inline + * - C99+: inline + * - GCC/Clang C90: __attribute__((unused)) to silence warnings + * - Other C90: empty + */ +#if !defined(MLK_INLINE) +#if defined(_MSC_VER) +#define MLK_INLINE __inline +#elif defined(inline) || \ + (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +#define MLK_INLINE inline +#elif defined(__GNUC__) || defined(__clang__) +#define MLK_INLINE __attribute__((unused)) +#else +#define MLK_INLINE +#endif +#endif /* !MLK_INLINE */ + +/* + * MLK_ALWAYS_INLINE: Force inlining. + * - MSVC: __forceinline + * - GCC/Clang C99+: MLK_INLINE __attribute__((always_inline)) + * - Other: MLK_INLINE (no forced inlining) + */ +#if !defined(MLK_ALWAYS_INLINE) +#if defined(_MSC_VER) +#define MLK_ALWAYS_INLINE __forceinline +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(inline) || \ + (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L)) +#define MLK_ALWAYS_INLINE MLK_INLINE __attribute__((always_inline)) +#else +#define MLK_ALWAYS_INLINE MLK_INLINE +#endif +#endif /* !MLK_ALWAYS_INLINE */ + +#ifndef MLK_STATIC_TESTABLE +#define MLK_STATIC_TESTABLE static +#endif + +/* + * C90 does not have the restrict compiler directive yet. + * We don't use it in C90 builds. + */ +#if !defined(restrict) +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L +#define MLK_RESTRICT restrict +#else +#define MLK_RESTRICT +#endif + +#else /* !restrict */ + +#define MLK_RESTRICT restrict +#endif /* restrict */ + +#define MLK_DEFAULT_ALIGN 32 +#define MLK_ALIGN_UP(N) \ + ((((N) + (MLK_DEFAULT_ALIGN - 1)) / MLK_DEFAULT_ALIGN) * MLK_DEFAULT_ALIGN) +#if defined(__GNUC__) +#define MLK_ALIGN __attribute__((aligned(MLK_DEFAULT_ALIGN))) +#elif defined(_MSC_VER) +#define MLK_ALIGN __declspec(align(MLK_DEFAULT_ALIGN)) +#else +#define MLK_ALIGN /* No known support for alignment constraints */ +#endif + + +/* New X86_64 CPUs support Conflow-flow protection using the CET instructions. + * When enabled (through -fcf-protection=), all compilation units (including + * empty ones) need to support CET for this to work. + * For assembly, this means that source files need to signal support for + * CET by setting the appropriate note.gnu.property section. + * This can be achieved by including the header in all assembly file. + * This file also provides the _CET_ENDBR macro which needs to be placed at + * every potential target of an indirect branch. + * If CET is enabled _CET_ENDBR maps to the endbr64 instruction, otherwise + * it is empty. + * In case the compiler does not support CET (e.g., +#define MLK_CET_ENDBR _CET_ENDBR +#else +#define MLK_CET_ENDBR +#endif +#endif /* MLK_SYS_X86_64 */ + +#if defined(MLK_CONFIG_CT_TESTING_ENABLED) && !defined(__ASSEMBLER__) +#include +#define MLK_CT_TESTING_SECRET(ptr, len) \ + VALGRIND_MAKE_MEM_UNDEFINED((ptr), (len)) +#define MLK_CT_TESTING_DECLASSIFY(ptr, len) \ + VALGRIND_MAKE_MEM_DEFINED((ptr), (len)) +#else /* MLK_CONFIG_CT_TESTING_ENABLED && !__ASSEMBLER__ */ +#define MLK_CT_TESTING_SECRET(ptr, len) \ + do \ + { \ + } while (0) +#define MLK_CT_TESTING_DECLASSIFY(ptr, len) \ + do \ + { \ + } while (0) +#endif /* !(MLK_CONFIG_CT_TESTING_ENABLED && !__ASSEMBLER__) */ + +#if defined(__GNUC__) || defined(__clang__) +#define MLK_MUST_CHECK_RETURN_VALUE __attribute__((warn_unused_result)) +#else +#define MLK_MUST_CHECK_RETURN_VALUE +#endif + +#if !defined(__ASSEMBLER__) +/* System capability enumeration */ +typedef enum +{ + /* x86_64 */ + MLK_SYS_CAP_AVX2, + /* AArch64 */ + MLK_SYS_CAP_SHA3 +} mlk_sys_cap; + +#if !defined(MLK_CONFIG_CUSTOM_CAPABILITY_FUNC) +#include "cbmc.h" + +MLK_MUST_CHECK_RETURN_VALUE +static MLK_INLINE int mlk_sys_check_capability(mlk_sys_cap cap) +__contract__( + ensures(return_value == 0 || return_value == 1) +) +{ + /* By default, we rely on compile-time feature detection/specification: + * If a feature is enabled at compile-time, we assume it is supported by + * the host that the resulting library/binary will be built on. + * If this assumption is not true, you MUST overwrite this function. + * See the documentation of MLK_CONFIG_CUSTOM_CAPABILITY_FUNC in + * mlkem_native_config.h for more information. */ + (void)cap; + return 1; +} +#endif /* !MLK_CONFIG_CUSTOM_CAPABILITY_FUNC */ +#endif /* !__ASSEMBLER__ */ + +#endif /* !MLK_SYS_H */ diff --git a/mlkem_native/src/verify.c b/mlkem_native/src/verify.c new file mode 100644 index 0000000..db760f7 --- /dev/null +++ b/mlkem_native/src/verify.c @@ -0,0 +1,20 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ +#include "verify.h" + +#if !defined(MLK_USE_ASM_VALUE_BARRIER) && \ + !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) +/* + * Masking value used in constant-time functions from + * verify.h to block the compiler's range analysis and + * thereby reduce the risk of compiler-introduced branches. + */ +volatile uint64_t mlk_ct_opt_blocker_u64 = 0; + +#else /* !MLK_USE_ASM_VALUE_BARRIER && !MLK_CONFIG_MULTILEVEL_NO_SHARED */ + +MLK_EMPTY_CU(verify) + +#endif /* !(!MLK_USE_ASM_VALUE_BARRIER && !MLK_CONFIG_MULTILEVEL_NO_SHARED) */ diff --git a/mlkem_native/src/verify.h b/mlkem_native/src/verify.h new file mode 100644 index 0000000..a9bdeaa --- /dev/null +++ b/mlkem_native/src/verify.h @@ -0,0 +1,464 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* References + * ========== + * + * - [FIPS203] + * FIPS 203 Module-Lattice-Based Key-Encapsulation Mechanism Standard + * National Institute of Standards and Technology + * https://csrc.nist.gov/pubs/fips/203/final + * + * - [REF] + * CRYSTALS-Kyber C reference implementation + * Bos, Ducas, Kiltz, Lepoint, Lyubashevsky, Schanck, Schwabe, Seiler, StehlΓ© + * https://github.com/pq-crystals/kyber/tree/main/ref + * + * - [libmceliece] + * libmceliece implementation of Classic McEliece + * Bernstein, Chou + * https://lib.mceliece.org/ + * + * - [optblocker] + * PQC forum post on opt-blockers using volatile globals + * Daniel J. Bernstein + * https://groups.google.com/a/list.nist.gov/g/pqc-forum/c/hqbtIGFKIpU/m/H14H0wOlBgAJ + */ + +#ifndef MLK_VERIFY_H +#define MLK_VERIFY_H + + +#include "cbmc.h" +#include "common.h" + +/* Constant-time comparisons and conditional operations + + We reduce the risk for compilation into variable-time code + through the use of 'value barriers'. + + Functionally, a value barrier is a no-op. To the compiler, however, + it constitutes an arbitrary modification of its input, and therefore + harden's value propagation and range analysis. + + We consider two approaches to implement a value barrier: + - An empty inline asm block which marks the target value as clobbered. + - XOR'ing with the value of a volatile global that's set to 0; + see @[optblocker] for a discussion of this idea, and + @[libmceliece, inttypes/crypto_intN.h] for an implementation. + + The first approach is cheap because it only prevents the compiler + from reasoning about the value of the variable past the barrier, + but does not directly generate additional instructions. + + The second approach generates redundant loads and XOR operations + and therefore comes at a higher runtime cost. However, it appears + more robust towards optimization, as compilers should never drop + a volatile load. + + We use the empty-ASM value barrier for GCC and clang, and fall + back to the global volatile barrier otherwise. + + The global value barrier can be forced by setting + MLK_CONFIG_NO_ASM_VALUE_BARRIER. + +*/ + +#if defined(MLK_HAVE_INLINE_ASM) && !defined(MLK_CONFIG_NO_ASM_VALUE_BARRIER) +#define MLK_USE_ASM_VALUE_BARRIER +#endif + +#if !defined(MLK_USE_ASM_VALUE_BARRIER) + +/* + * Declaration of global volatile that the global value barrier + * is loading from and masking with. + */ +#define mlk_ct_opt_blocker_u64 MLK_NAMESPACE(ct_opt_blocker_u64) +extern volatile uint64_t mlk_ct_opt_blocker_u64; + +/* Helper functions for obtaining global masks of various sizes */ + +/* This contract is not proved but treated as an axiom. + * + * Its validity relies on the assumption that the global opt-blocker + * constant mlk_ct_opt_blocker_u64 is not modified. + */ +static MLK_INLINE uint64_t mlk_ct_get_optblocker_u64(void) +__contract__(ensures(return_value == 0)) { return mlk_ct_opt_blocker_u64; } + +static MLK_INLINE uint8_t mlk_ct_get_optblocker_u8(void) +__contract__(ensures(return_value == 0)) { return (uint8_t)mlk_ct_get_optblocker_u64(); } + +static MLK_INLINE uint32_t mlk_ct_get_optblocker_u32(void) +__contract__(ensures(return_value == 0)) { return (uint32_t)mlk_ct_get_optblocker_u64(); } + +static MLK_INLINE int32_t mlk_ct_get_optblocker_i32(void) +__contract__(ensures(return_value == 0)) { return (int32_t)mlk_ct_get_optblocker_u64(); } + +/* Opt-blocker based implementation of value barriers */ +static MLK_INLINE uint32_t mlk_value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) { return (b ^ mlk_ct_get_optblocker_u32()); } + +static MLK_INLINE int32_t mlk_value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) { return (b ^ mlk_ct_get_optblocker_i32()); } + +static MLK_INLINE uint8_t mlk_value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) { return (b ^ mlk_ct_get_optblocker_u8()); } + +#else /* !MLK_USE_ASM_VALUE_BARRIER */ + +static MLK_INLINE uint32_t mlk_value_barrier_u32(uint32_t b) +__contract__(ensures(return_value == b)) +{ + __asm__ volatile("" : "+r"(b)); + return b; +} + +static MLK_INLINE int32_t mlk_value_barrier_i32(int32_t b) +__contract__(ensures(return_value == b)) +{ + __asm__ volatile("" : "+r"(b)); + return b; +} + +static MLK_INLINE uint8_t mlk_value_barrier_u8(uint8_t b) +__contract__(ensures(return_value == b)) +{ + __asm__ volatile("" : "+r"(b)); + return b; +} + +#endif /* MLK_USE_ASM_VALUE_BARRIER */ + +#ifdef CBMC +#pragma CPROVER check push +#pragma CPROVER check disable "conversion" +#endif +/************************************************* + * Name: mlk_cast_uint16_to_int16 + * + * Description: Cast uint16 value to int16 + * + * Returns: For uint16_t x, the unique y in int16_t + * so that x == y mod 2^16. + * + * Concretely: + * - x < 32768: returns x + * - x >= 32768: returns x - 65536 + * + **************************************************/ +static MLK_ALWAYS_INLINE int16_t mlk_cast_uint16_to_int16(uint16_t x) +{ + /* + * PORTABILITY: This relies on uint16_t -> int16_t + * being implemented as the inverse of int16_t -> uint16_t, + * which is implementation-defined (C99 6.3.1.3 (3)) + * CBMC (correctly) fails to prove this conversion is OK, + * so we have to suppress that check here + */ + return (int16_t)x; +} +#ifdef CBMC +#pragma CPROVER check pop +#endif + +/************************************************* + * Name: mlk_cast_int32_to_uint16 + * + * Description: Cast int32 value to uint16 as per C standard. + * + * Returns: For int32_t x, the unique y in uint16_t + * so that x == y mod 2^16. + **************************************************/ +static MLK_ALWAYS_INLINE uint16_t mlk_cast_int32_to_uint16(int32_t x) +{ + return (uint16_t)(x & (int32_t)UINT16_MAX); +} + +/************************************************* + * Name: mlk_cast_int16_to_uint16 + * + * Description: Cast int16 value to uint16 as per C standard. + * + * Returns: For int16_t x, the unique y in uint16_t + * so that x == y mod 2^16. + **************************************************/ +static MLK_ALWAYS_INLINE uint16_t mlk_cast_int16_to_uint16(int32_t x) +{ + return mlk_cast_int32_to_uint16((int32_t)x); +} + +/************************************************* + * Name: mlk_ct_cmask_neg_i16 + * + * Description: Return 0 if input is non-negative, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + * + **************************************************/ + +/* Reference: Embedded in polynomial compression function in the + * reference implementation @[REF]. + * - Used as part of signed->unsigned conversion for modular + * representatives to detect whether the input is negative. + * This happen in `mlk_poly_reduce()` here, and as part of + * polynomial compression functions in the reference + * implementation. See `mlk_poly_reduce()`. + * - We use value barriers to reduce the risk of + * compiler-introduced branches. */ +static MLK_INLINE uint16_t mlk_ct_cmask_neg_i16(int16_t x) +__contract__(ensures(return_value == ((x < 0) ? 0xFFFF : 0))) +{ + int32_t tmp = mlk_value_barrier_i32((int32_t)x); + tmp >>= 16; + return mlk_cast_int32_to_uint16(tmp); +} + +/************************************************* + * Name: mlk_ct_cmask_nonzero_u16 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint16_t x: Value to be converted into a mask + * + **************************************************/ + +/* Reference: Embedded in `cmov_int16()` in the reference implementation @[REF]. + * - Use value barrier and shift instead of `b = -b` to + * convert condition into mask. */ +static MLK_INLINE uint16_t mlk_ct_cmask_nonzero_u16(uint16_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFFFF))) +{ + int32_t tmp = mlk_value_barrier_i32(-((int32_t)x)); + tmp >>= 16; + return mlk_cast_int32_to_uint16(tmp); +} + +/************************************************* + * Name: mlk_ct_cmask_nonzero_u8 + * + * Description: Return 0 if input is zero, and -1 otherwise. + * + * Arguments: uint8_t x: Value to be converted into a mask + * + **************************************************/ + +/* Reference: Embedded in `verify()` and `cmov()` in the + * reference implementation @[REF]. + * - We include a value barrier not present in the + * reference implementation, to prevent the compiler + * from realizing that this function returns a mask. */ +static MLK_INLINE uint8_t mlk_ct_cmask_nonzero_u8(uint8_t x) +__contract__(ensures(return_value == ((x == 0) ? 0 : 0xFF))) +{ + uint16_t mask = mlk_ct_cmask_nonzero_u16((uint16_t)x); + return (uint8_t)(mask & 0xFF); +} + +/************************************************* + * Name: mlk_ct_sel_int16 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: int16_t a: First alternative + * int16_t b: Second alternative + * uint16_t cond: Condition variable. + * + * Specification: + * - With `a = MLKEM_Q_HALF` and `b=0`, this essentially + * implements `Decompress_1` @[FIPS203, Eq (4.8)] in `mlk_poly_frommsg()`. + * - With `a = x + MLKEM_Q`, `b = x`, and `cond` indicating whether `x` + * is negative, implements signed->unsigned conversion of modular + * representatives. Questions of representation are not considered + * in the specification @[FIPS203, Section 2.4.1, "The pseudocode is + * agnostic regarding how an integer modulo π‘š is represented in + * actual implementations"]. + * + **************************************************/ + +/* Reference: Embedded in polynomial compression function in the + * reference implementation @[REF]. + * - Used as part of signed->unsigned conversion for modular + * representatives. This happen in `mlk_poly_reduce()` here, + * and as part of polynomial compression functions in @[REF]. + * See `mlk_poly_reduce()`. + * - Barrier to reduce the risk of compiler-introduced branches. + * For `a = MLKEM_Q_HALF` and `b=0`, also embedded in + * `poly_frommsg()` from the reference implementation, which uses + * `cmov_int16()` instead. */ +static MLK_INLINE int16_t mlk_ct_sel_int16(int16_t a, int16_t b, uint16_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + uint16_t au = mlk_cast_int16_to_uint16(a); + uint16_t bu = mlk_cast_int16_to_uint16(b); + uint16_t res = bu ^ (mlk_ct_cmask_nonzero_u16(cond) & (au ^ bu)); + return mlk_cast_uint16_to_int16(res); +} + +/************************************************* + * Name: mlk_ct_sel_uint8 + * + * Description: Functionally equivalent to cond ? a : b, + * but implemented with guards against + * compiler-introduced branches. + * + * Arguments: uint8_t a: First alternative + * uint8_t b: Second alternative + * uuint8_t cond: Condition variable. + * + **************************************************/ + +/* Reference: Embedded into `cmov()` in the reference implementation @[REF]. + * - Use value barrier to get mask from condition value. */ +static MLK_INLINE uint8_t mlk_ct_sel_uint8(uint8_t a, uint8_t b, uint8_t cond) +__contract__(ensures(return_value == (cond ? a : b))) +{ + return b ^ (mlk_ct_cmask_nonzero_u8(cond) & (a ^ b)); +} + +/************************************************* + * Name: mlk_ct_memcmp + * + * Description: Compare two arrays for equality in constant time. + * + * Arguments: const uint8_t *a: pointer to first byte array + * const uint8_t *b: pointer to second byte array + * size_t len: length of the byte arrays, upper-bounded + * to UINT16_MAX to control proof complexity + * only. + * + * Returns 0 if the byte arrays are equal, 0xFF otherwise. + * + * Specification: + * - Used to securely compute conditional move in + * @[FIPS203, Algorithm 18 (ML-KEM.Decaps_Internal, L9-11] + * + **************************************************/ + +/* Reference: `cmov()` in the reference implementation @[REF] + * - We return `uint8_t`, not `int`. + * - We use an additional XOR-accumulator in the comparison loop + * which prevents early abort if the OR-accumulator is 0xFF. + * - We use a value barrier to convert the OR-accumulator into + * a mask. The reference implementation uses a shift which the + * compiler can argue to result in either 0 of 0xFF..FF. */ +static MLK_INLINE uint8_t mlk_ct_memcmp(const uint8_t *a, const uint8_t *b, + const size_t len) +__contract__( + requires(len <= UINT16_MAX) + requires(memory_no_alias(a, len)) + requires(memory_no_alias(b, len)) + ensures((return_value == 0) || (return_value == 0xFF)) + ensures((return_value == 0) == forall(i, 0, len, (a[i] == b[i])))) +{ + uint8_t r = 0, s = 0; + unsigned i; + + for (i = 0; i < len; i++) + __loop__( + invariant(i <= len) + invariant((r == 0) == (forall(k, 0, i, (a[k] == b[k]))))) + { + r |= a[i] ^ b[i]; + /* s is useless, but prevents the loop from being aborted once r=0xff. */ + s ^= a[i] ^ b[i]; + } + + /* + * - Convert r into a mask; this may not be necessary, but is an additional + * safeguard + * towards leaking information about a and b. + * - XOR twice with s, separated by a value barrier, to prevent the compile + * from dropping the s computation in the loop. + */ + return (mlk_value_barrier_u8(mlk_ct_cmask_nonzero_u8(r) ^ s) ^ s); +} + +/************************************************* + * Name: mlk_ct_cmov_zero + * + * Description: Copy len bytes from x to r if b is zero; + * don't modify x if b is non-zero. + * assumes two's complement representation of negative integers. + * Runs in constant time. + * + * Arguments: uint8_t *r: pointer to output byte array + * const uint8_t *x: pointer to input byte array + * size_t len: Amount of bytes to be copied + * uint8_t b: Condition value. + * + * Specification: + * - Used to securely compute conditional move in + * @[FIPS203, Algorithm 18 (ML-KEM.Decaps_Internal, L9-11] + * + **************************************************/ + +/* Reference: `cmov()` in the reference implementation @[REF]. + * - We move if condition value is `0`, not `1`. + * - We use `mlk_ct_sel_uint8` for constant-time selection. */ +static MLK_INLINE void mlk_ct_cmov_zero(uint8_t *r, const uint8_t *x, + size_t len, uint8_t b) +__contract__( + requires(len <= MLK_MAX_BUFFER_SIZE) + requires(memory_no_alias(r, len)) + requires(memory_no_alias(x, len)) + assigns(memory_slice(r, len)) + ensures(forall(i, 0, len, (r[i] == (b == 0 ? x[i] : old(r)[i]))))) +{ + size_t i; + for (i = 0; i < len; i++) + __loop__( + invariant(i <= len) + invariant(forall(k, 0, i, r[k] == (b == 0 ? x[k] : loop_entry(r)[k])))) + { + r[i] = mlk_ct_sel_uint8(r[i], x[i], b); + } +} + +/************************************************* + * Name: mlk_zeroize + * + * Description: Force-zeroize a buffer. + * + * Arguments: uint8_t *r: pointer to byte array to be zeroed + * size_t len: Amount of bytes to be zeroed + * + * Specification: Used to implement + * @[FIPS203, Section 3.3, Destruction of intermediate values] + * + **************************************************/ + +/* Reference: Not present in the reference implementation @[REF]. */ +#if !defined(MLK_CONFIG_CUSTOM_ZEROIZE) +#if defined(MLK_SYS_WINDOWS) +#include +static MLK_INLINE void mlk_zeroize(void *ptr, size_t len) +__contract__( + requires(memory_no_alias(ptr, len)) + assigns(memory_slice(ptr, len))) { SecureZeroMemory(ptr, len); } +#elif defined(MLK_HAVE_INLINE_ASM) +#include +static MLK_INLINE void mlk_zeroize(void *ptr, size_t len) +__contract__( + requires(memory_no_alias(ptr, len)) + assigns(memory_slice(ptr, len))) +{ + mlk_memset(ptr, 0, len); + /* This follows OpenSSL and seems sufficient to prevent the compiler + * from optimizing away the memset. + * + * If there was a reliable way to detect availability of memset_s(), + * that would be preferred. */ + __asm__ volatile("" : : "r"(ptr) : "memory"); +} +#else /* !MLK_SYS_WINDOWS && MLK_HAVE_INLINE_ASM */ +#error No plausibly-secure implementation of mlk_zeroize available. Please provide your own using MLK_CONFIG_CUSTOM_ZEROIZE. +#endif /* !MLK_SYS_WINDOWS && !MLK_HAVE_INLINE_ASM */ +#endif /* !MLK_CONFIG_CUSTOM_ZEROIZE */ + +#endif /* !MLK_VERIFY_H */ diff --git a/mlkem_native/src/zetas.inc b/mlkem_native/src/zetas.inc new file mode 100644 index 0000000..00316da --- /dev/null +++ b/mlkem_native/src/zetas.inc @@ -0,0 +1,30 @@ +/* + * Copyright (c) The mlkem-native project authors + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT + */ + +/* + * WARNING: This file is auto-generated from scripts/autogen + * in the mlkem-native repository. + * Do not modify it directly. + */ + + +/* + * Table of zeta values used in the reference NTT and inverse NTT. + * See autogen for details. + */ +static MLK_ALIGN const int16_t mlk_zetas[128] = { + -1044, -758, -359, -1517, 1493, 1422, 287, 202, -171, 622, 1577, + 182, 962, -1202, -1474, 1468, 573, -1325, 264, 383, -829, 1458, + -1602, -130, -681, 1017, 732, 608, -1542, 411, -205, -1571, 1223, + 652, -552, 1015, -1293, 1491, -282, -1544, 516, -8, -320, -666, + -1618, -1162, 126, 1469, -853, -90, -271, 830, 107, -1421, -247, + -951, -398, 961, -1508, -725, 448, -1065, 677, -1275, -1103, 430, + 555, 843, -1251, 871, 1550, 105, 422, 587, 177, -235, -291, + -460, 1574, 1653, -246, 778, 1159, -147, -777, 1483, -602, 1119, + -1590, 644, -872, 349, 418, 329, -156, -75, 817, 1097, 603, + 610, 1322, -1285, -1465, 384, -1215, -136, 1218, -1335, -874, 220, + -1187, -1659, -1185, -1530, -1278, 794, -1510, -854, -870, 478, -108, + -308, 996, 991, 958, -1460, 1522, 1628, +}; diff --git a/mlkem_native/test/Makefile b/mlkem_native/test/Makefile new file mode 100644 index 0000000..e24da68 --- /dev/null +++ b/mlkem_native/test/Makefile @@ -0,0 +1,17 @@ +# ML-KEM-768 test suite +# Run on host (x86/ARM) to validate the integration before flashing +CC ?= gcc +CFLAGS = -std=c99 -O2 -Wall -Wextra -Wno-unused-result -I../.. -I.. + +all: test_mlkem768 + +test_mlkem768: test_mlkem768.c ../../mlkem_native/mlkem_native.c + $(CC) $(CFLAGS) -DMLK_CONFIG_PARAMETER_SET=768 $< ../../mlkem_native/mlkem_native.c -o $@ + +test: test_mlkem768 + ./test_mlkem768 + +clean: + rm -f test_mlkem768 + +.PHONY: all test clean diff --git a/mlkem_native/test/test_mlkem768.c b/mlkem_native/test/test_mlkem768.c new file mode 100644 index 0000000..a6b5a41 --- /dev/null +++ b/mlkem_native/test/test_mlkem768.c @@ -0,0 +1,437 @@ +/* + * ML-KEM-768 & X-Wing KEM Test Suite + * Tests compliance with FIPS 203 and draft-connolly-cfrg-xwing-kem-09 + * Compatible with age v1.3.0 mlkem768x25519 recipient type + * + * Compile (from mlkem_native/test/): + * make test + */ + +#include +#include +#include +#include +#include + +#include "mlkem_native/mlkem_native.h" + +/* Access SHA3-256 and SHAKE256 from mlkem-native */ +extern void PQCP_MLKEM_NATIVE_MLKEM768_sha3_256(uint8_t *output, const uint8_t *input, size_t inlen); +extern void PQCP_MLKEM_NATIVE_MLKEM768_shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); +#define xwing_sha3_256 PQCP_MLKEM_NATIVE_MLKEM768_sha3_256 +#define xwing_shake256 PQCP_MLKEM_NATIVE_MLKEM768_shake256 + +/* X-Wing label: "\./", "/^\" = hex 5c 2e 2f 2f 5e 5c */ +static const uint8_t XWingLabel[6] = {0x5c, 0x2e, 0x2f, 0x2f, 0x5e, 0x5c}; + +int onlykey_mlkem_randombytes(uint8_t *out, size_t outlen) { + FILE *f = fopen("/dev/urandom", "rb"); + if (!f) return -1; + if (fread(out, 1, outlen, f) != outlen) { fclose(f); return -1; } + fclose(f); + return 0; +} + +/* X-Wing Combiner (Section 5.3): + * SHA3-256(ss_M || ss_X || ct_X || pk_X || XWingLabel) */ +static void xwing_combiner(uint8_t ss[32], + const uint8_t ss_M[32], const uint8_t ss_X[32], + const uint8_t ct_X[32], const uint8_t pk_X[32]) +{ + uint8_t buf[134]; + memcpy(buf, ss_M, 32); + memcpy(buf + 32, ss_X, 32); + memcpy(buf + 64, ct_X, 32); + memcpy(buf + 96, pk_X, 32); + memcpy(buf + 128, XWingLabel, 6); + xwing_sha3_256(ss, buf, 134); +} + +/* Minimal X25519 using tweetnacl-compatible scalar mult + * For testing only β€” firmware uses Curve25519 library */ +/* Base point for X25519 */ +static const uint8_t X25519_BASE[32] = {9}; + +/* We need X25519 scalar mult for the test. Use a simple + * implementation or link against tweetnacl. For now, we + * implement the test using the mlkem-native SHAKE256 to + * simulate β€” but for real interop tests we need actual X25519. + * + * Since we can't link tweetnacl in this standalone test, + * we test the ML-KEM and combiner components, and mark + * full X-Wing round-trip as requiring firmware. */ + +static int tests_run = 0; +static int tests_passed = 0; + +#define TEST(name) do { tests_run++; printf(" [%02d] %-55s ", tests_run, name); fflush(stdout); } while(0) +#define PASS() do { tests_passed++; printf("PASS\n"); } while(0) +#define FAIL(msg) do { printf("FAIL: %s\n", msg); } while(0) + +/* === ML-KEM-768 Tests === */ + +static int test_sizes(void) { + TEST("ML-KEM: sizes match FIPS 203"); + if (MLKEM768_PUBLICKEYBYTES != 1184 || MLKEM768_SECRETKEYBYTES != 2400 || + MLKEM768_CIPHERTEXTBYTES != 1088 || MLKEM_BYTES != 32) { FAIL("mismatch"); return 1; } + PASS(); return 0; +} + +static int test_roundtrip(void) { + TEST("ML-KEM: keygen + encaps + decaps round-trip"); + uint8_t pk[1184], sk[2400], ct[1088], ss1[32], ss2[32]; + if (crypto_kem_keypair(pk, sk) != 0) { FAIL("keygen"); return 1; } + if (crypto_kem_enc(ct, ss1, pk) != 0) { FAIL("encaps"); return 1; } + if (crypto_kem_dec(ss2, ct, sk) != 0) { FAIL("decaps"); return 1; } + if (memcmp(ss1, ss2, 32) != 0) { FAIL("ss mismatch"); return 1; } + PASS(); return 0; +} + +static int test_different_secrets(void) { + TEST("ML-KEM: multiple encaps produce different secrets"); + uint8_t pk[1184], sk[2400], ct1[1088], ct2[1088], ss1[32], ss2[32]; + crypto_kem_keypair(pk, sk); + crypto_kem_enc(ct1, ss1, pk); + crypto_kem_enc(ct2, ss2, pk); + if (memcmp(ss1, ss2, 32) == 0) { FAIL("identical"); return 1; } + PASS(); return 0; +} + +static int test_wrong_sk(void) { + TEST("ML-KEM: wrong SK implicit rejection"); + uint8_t pk1[1184], sk1[2400], pk2[1184], sk2[2400], ct[1088], ss1[32], ss2[32]; + crypto_kem_keypair(pk1, sk1); crypto_kem_keypair(pk2, sk2); + crypto_kem_enc(ct, ss1, pk1); crypto_kem_dec(ss2, ct, sk2); + if (memcmp(ss1, ss2, 32) == 0) { FAIL("matched"); return 1; } + PASS(); return 0; +} + +static int test_corrupted_ct(void) { + TEST("ML-KEM: corrupted CT implicit rejection"); + uint8_t pk[1184], sk[2400], ct[1088], ss1[32], ss2[32]; + crypto_kem_keypair(pk, sk); crypto_kem_enc(ct, ss1, pk); + ct[0] ^= 0x01; crypto_kem_dec(ss2, ct, sk); + if (memcmp(ss1, ss2, 32) == 0) { FAIL("matched"); return 1; } + PASS(); return 0; +} + +static int test_pk_in_sk(void) { + TEST("ML-KEM: PK at SK offset 1152"); + uint8_t pk[1184], sk[2400]; + crypto_kem_keypair(pk, sk); + if (memcmp(pk, sk + 1152, 1184) != 0) { FAIL("wrong offset"); return 1; } + PASS(); return 0; +} + +static int test_check_pk_sk(void) { + TEST("ML-KEM: check_pk/check_sk validate keys"); + uint8_t pk[1184], sk[2400]; + crypto_kem_keypair(pk, sk); + if (crypto_kem_check_pk(pk) != 0) { FAIL("pk rejected"); return 1; } + if (crypto_kem_check_sk(sk) != 0) { FAIL("sk rejected"); return 1; } + PASS(); return 0; +} + +static int test_derand_keygen(void) { + TEST("ML-KEM: deterministic keygen produces same keys"); + uint8_t coins[64]; + onlykey_mlkem_randombytes(coins, 64); + uint8_t pk1[1184], sk1[2400], pk2[1184], sk2[2400]; + crypto_kem_keypair_derand(pk1, sk1, coins); + crypto_kem_keypair_derand(pk2, sk2, coins); + if (memcmp(pk1, pk2, 1184) != 0) { FAIL("pk differs"); return 1; } + if (memcmp(sk1, sk2, 2400) != 0) { FAIL("sk differs"); return 1; } + PASS(); return 0; +} + +static int test_onlykey_flow(void) { + TEST("ML-KEM: simulated OnlyKey ctap_buffer flow"); + uint8_t ctap[7609]; memset(ctap, 0, sizeof(ctap)); + uint8_t *sk = ctap, *pk = ctap + 2400; + if (crypto_kem_keypair(pk, sk) != 0) { FAIL("keygen"); return 1; } + uint8_t pk_h[1184]; memcpy(pk_h, pk, 1184); + uint8_t flash[2400]; memcpy(flash, sk, 2400); + memset(ctap, 0, sizeof(ctap)); + uint8_t ct[1088], ss_h[32]; + if (crypto_kem_enc(ct, ss_h, pk_h) != 0) { FAIL("encaps"); return 1; } + memcpy(ctap, flash, 2400); + memcpy(ctap + 5465, ct, 1088); + uint8_t ss_d[32]; + if (crypto_kem_dec(ss_d, ctap + 5465, ctap) != 0) { FAIL("decaps"); return 1; } + if (memcmp(ss_h, ss_d, 32) != 0) { FAIL("ss mismatch"); return 1; } + PASS(); return 0; +} + +/* === X-Wing Spec Tests === */ + +static int test_xwing_sizes(void) { + TEST("X-Wing: PK=1216, CT=1120, SS=32, SK_seed=32"); + if ((1184 + 32) != 1216) { FAIL("pk"); return 1; } + if ((1088 + 32) != 1120) { FAIL("ct"); return 1; } + PASS(); return 0; +} + +static int test_xwing_label(void) { + TEST("X-Wing: label is \\./ /^\\ = hex 5c2e2f2f5e5c"); + uint8_t expected[6] = {0x5c, 0x2e, 0x2f, 0x2f, 0x5e, 0x5c}; + if (memcmp(XWingLabel, expected, 6) != 0) { FAIL("wrong label"); return 1; } + PASS(); return 0; +} + +static int test_xwing_shake256_expansion(void) { + TEST("X-Wing: SHAKE256(seed,96) is deterministic"); + uint8_t seed[32]; + onlykey_mlkem_randombytes(seed, 32); + uint8_t exp1[96], exp2[96]; + xwing_shake256(exp1, 96, seed, 32); + xwing_shake256(exp2, 96, seed, 32); + if (memcmp(exp1, exp2, 96) != 0) { FAIL("not deterministic"); return 1; } + /* Different seed => different expansion */ + seed[0] ^= 0x01; + xwing_shake256(exp2, 96, seed, 32); + if (memcmp(exp1, exp2, 96) == 0) { FAIL("different seed same output"); return 1; } + PASS(); return 0; +} + +static int test_xwing_derand_keygen(void) { + TEST("X-Wing: keygen from seed via SHAKE256 is deterministic"); + uint8_t seed[32]; + onlykey_mlkem_randombytes(seed, 32); + uint8_t expanded[96]; + xwing_shake256(expanded, 96, seed, 32); + /* ML-KEM keygen from expanded[0:64] */ + uint8_t pk1[1184], sk1[2400], pk2[1184], sk2[2400]; + crypto_kem_keypair_derand(pk1, sk1, expanded); + crypto_kem_keypair_derand(pk2, sk2, expanded); + if (memcmp(pk1, pk2, 1184) != 0) { FAIL("pk differs"); return 1; } + if (memcmp(sk1, sk2, 2400) != 0) { FAIL("sk differs"); return 1; } + PASS(); return 0; +} + +static int test_xwing_combiner_deterministic(void) { + TEST("X-Wing: combiner is deterministic"); + uint8_t ss_M[32], ss_X[32], ct_X[32], pk_X[32]; + onlykey_mlkem_randombytes(ss_M, 32); + onlykey_mlkem_randombytes(ss_X, 32); + onlykey_mlkem_randombytes(ct_X, 32); + onlykey_mlkem_randombytes(pk_X, 32); + uint8_t h1[32], h2[32]; + xwing_combiner(h1, ss_M, ss_X, ct_X, pk_X); + xwing_combiner(h2, ss_M, ss_X, ct_X, pk_X); + if (memcmp(h1, h2, 32) != 0) { FAIL("not deterministic"); return 1; } + PASS(); return 0; +} + +static int test_xwing_combiner_uses_all_inputs(void) { + TEST("X-Wing: combiner output changes with each input"); + uint8_t ss_M[32], ss_X[32], ct_X[32], pk_X[32], base[32], test[32]; + onlykey_mlkem_randombytes(ss_M, 32); + onlykey_mlkem_randombytes(ss_X, 32); + onlykey_mlkem_randombytes(ct_X, 32); + onlykey_mlkem_randombytes(pk_X, 32); + xwing_combiner(base, ss_M, ss_X, ct_X, pk_X); + + /* Flip bit in ss_M */ + ss_M[0] ^= 1; + xwing_combiner(test, ss_M, ss_X, ct_X, pk_X); + if (memcmp(base, test, 32) == 0) { FAIL("ss_M ignored"); return 1; } + ss_M[0] ^= 1; + + /* Flip bit in ss_X */ + ss_X[0] ^= 1; + xwing_combiner(test, ss_M, ss_X, ct_X, pk_X); + if (memcmp(base, test, 32) == 0) { FAIL("ss_X ignored"); return 1; } + ss_X[0] ^= 1; + + /* Flip bit in ct_X */ + ct_X[0] ^= 1; + xwing_combiner(test, ss_M, ss_X, ct_X, pk_X); + if (memcmp(base, test, 32) == 0) { FAIL("ct_X ignored"); return 1; } + ct_X[0] ^= 1; + + /* Flip bit in pk_X */ + pk_X[0] ^= 1; + xwing_combiner(test, ss_M, ss_X, ct_X, pk_X); + if (memcmp(base, test, 32) == 0) { FAIL("pk_X ignored"); return 1; } + + PASS(); return 0; +} + +static int test_xwing_combiner_layout(void) { + TEST("X-Wing: combiner = SHA3-256(ssM||ssX||ctX||pkX||label)"); + uint8_t ss_M[32], ss_X[32], ct_X[32], pk_X[32]; + onlykey_mlkem_randombytes(ss_M, 32); + onlykey_mlkem_randombytes(ss_X, 32); + onlykey_mlkem_randombytes(ct_X, 32); + onlykey_mlkem_randombytes(pk_X, 32); + + /* Compute via combiner function */ + uint8_t h_func[32]; + xwing_combiner(h_func, ss_M, ss_X, ct_X, pk_X); + + /* Compute manually per spec */ + uint8_t buf[134]; + memcpy(buf, ss_M, 32); + memcpy(buf + 32, ss_X, 32); + memcpy(buf + 64, ct_X, 32); + memcpy(buf + 96, pk_X, 32); + memcpy(buf + 128, XWingLabel, 6); + uint8_t h_manual[32]; + xwing_sha3_256(h_manual, buf, 134); + + if (memcmp(h_func, h_manual, 32) != 0) { FAIL("layout mismatch"); return 1; } + PASS(); return 0; +} + +static int test_xwing_mlkem_component_roundtrip(void) { + TEST("X-Wing: ML-KEM component works in hybrid context"); + /* Simulate X-Wing keygen ML-KEM part */ + uint8_t seed[32]; + onlykey_mlkem_randombytes(seed, 32); + uint8_t expanded[96]; + xwing_shake256(expanded, 96, seed, 32); + + uint8_t pk_M[1184], sk_M[2400]; + crypto_kem_keypair_derand(pk_M, sk_M, expanded); + + /* Encaps/decaps round-trip */ + uint8_t ct_M[1088], ss_enc[32], ss_dec[32]; + if (crypto_kem_enc(ct_M, ss_enc, pk_M) != 0) { FAIL("encaps"); return 1; } + if (crypto_kem_dec(ss_dec, ct_M, sk_M) != 0) { FAIL("decaps"); return 1; } + if (memcmp(ss_enc, ss_dec, 32) != 0) { FAIL("ss mismatch"); return 1; } + + /* Combined with fake X25519 values through combiner */ + uint8_t fake_ssX[32], fake_ctX[32], fake_pkX[32]; + onlykey_mlkem_randombytes(fake_ssX, 32); + onlykey_mlkem_randombytes(fake_ctX, 32); + onlykey_mlkem_randombytes(fake_pkX, 32); + + uint8_t combined1[32], combined2[32]; + xwing_combiner(combined1, ss_enc, fake_ssX, fake_ctX, fake_pkX); + xwing_combiner(combined2, ss_dec, fake_ssX, fake_ctX, fake_pkX); + if (memcmp(combined1, combined2, 32) != 0) { FAIL("combined diverged"); return 1; } + PASS(); return 0; +} + +static int test_xwing_wrong_mlkem_breaks_ss(void) { + TEST("X-Wing: wrong ML-KEM component breaks combined SS"); + uint8_t pk1[1184], sk1[2400], pk2[1184], sk2[2400], ct[1088]; + uint8_t ss_good[32], ss_bad[32], x_ss[32], ct_x[32], pk_x[32]; + crypto_kem_keypair(pk1, sk1); crypto_kem_keypair(pk2, sk2); + crypto_kem_enc(ct, ss_good, pk1); crypto_kem_dec(ss_bad, ct, sk2); + onlykey_mlkem_randombytes(x_ss, 32); + onlykey_mlkem_randombytes(ct_x, 32); + onlykey_mlkem_randombytes(pk_x, 32); + uint8_t c1[32], c2[32]; + xwing_combiner(c1, ss_good, x_ss, ct_x, pk_x); + xwing_combiner(c2, ss_bad, x_ss, ct_x, pk_x); + if (memcmp(c1, c2, 32) == 0) { FAIL("bad mlkem matched"); return 1; } + PASS(); return 0; +} + +static int test_xwing_wrong_x25519_breaks_ss(void) { + TEST("X-Wing: wrong X25519 component breaks combined SS"); + uint8_t mlkem_ss[32], ct_x[32], pk_x[32]; + uint8_t x_good[32], x_bad[32]; + onlykey_mlkem_randombytes(mlkem_ss, 32); + onlykey_mlkem_randombytes(ct_x, 32); + onlykey_mlkem_randombytes(pk_x, 32); + onlykey_mlkem_randombytes(x_good, 32); + onlykey_mlkem_randombytes(x_bad, 32); + uint8_t c1[32], c2[32]; + xwing_combiner(c1, mlkem_ss, x_good, ct_x, pk_x); + xwing_combiner(c2, mlkem_ss, x_bad, ct_x, pk_x); + if (memcmp(c1, c2, 32) == 0) { FAIL("bad x25519 matched"); return 1; } + PASS(); return 0; +} + +static int test_xwing_expanded_sk_layout(void) { + TEST("X-Wing: expanded SK layout sk_M(2400)||sk_X(32)||pk_X(32)"); + uint8_t seed[32]; + onlykey_mlkem_randombytes(seed, 32); + uint8_t expanded[96]; + xwing_shake256(expanded, 96, seed, 32); + + uint8_t pk_M[1184], sk_M[2400]; + crypto_kem_keypair_derand(pk_M, sk_M, expanded); + + /* Build expanded SK as firmware would */ + uint8_t xwing_sk[2464]; + memcpy(xwing_sk, sk_M, 2400); /* sk_M */ + memcpy(xwing_sk + 2400, expanded + 64, 32); /* sk_X */ + /* pk_X would be computed from sk_X via X25519(sk_X, BASE) */ + /* For this test, just verify the layout sizes */ + if (sizeof(xwing_sk) != 2464) { FAIL("size"); return 1; } + + /* Verify pk_M is extractable from sk_M at offset 1152 */ + if (memcmp(pk_M, sk_M + 1152, 1184) != 0) { FAIL("pk_M offset"); return 1; } + + PASS(); return 0; +} + +/* === Performance & Stress === */ + +static int test_performance(void) { + TEST("Performance: ML-KEM-768 (10 iterations)"); + uint8_t pk[1184], sk[2400], ct[1088], ss1[32], ss2[32]; + int N = 10; clock_t start, end; + start = clock(); for (int i=0;i 100) { //Slot 101-132 are for ECC, 1-4 are for RSA - if ((buffer[6] & 0x0F) == 1) { + if ((buffer[6] & 0x0F) == KEYTYPE_MLKEM768) { + okcrypto_mlkem_keygen(buffer); + return; + } else if ((buffer[6] & 0x0F) == KEYTYPE_XWING) { + okcrypto_xwing_keygen(buffer); + return; + } else if ((buffer[6] & 0x0F) == 1) { crypto_box_keypair(ecc_public_key, buffer+7); //Curve25519 } else if ((buffer[6] & 0x0F) == 2) { const struct uECC_Curve_t * curve = uECC_secp256r1(); //P-256 @@ -813,6 +865,18 @@ int okcrypto_shared_secret (uint8_t *pub, uint8_t *secret) { return 0; } + case KEYTYPE_MLKEM768: + // ML-KEM uses KEM (encaps/decaps), not DH shared secret + // Use okcrypto_mlkem_decaps() instead + hidprint("Error use ML-KEM decaps for this key type"); + return 1; + + case KEYTYPE_XWING: + // Hybrid uses combined KEM, not DH shared secret + // Use okcrypto_xwing_decaps() instead + hidprint("Error use X-Wing decaps for this key type"); + return 1; + default: hidprint("Error ECC type incorrect"); return 1; @@ -1669,9 +1733,332 @@ void okcrypto_split_sundae(uint8_t *state, uint8_t *iv, int len, uint8_t functio } } +/*************************************/ +//ML-KEM-768 operations +//Seed stored as 32-byte ECC key with KEYTYPE_MLKEM768 +/*************************************/ + +void okcrypto_mlkem_keygen (uint8_t *buffer) { + extern uint8_t ctap_buffer[CTAPHID_BUFFER_SIZE]; + #ifdef DEBUG + Serial.println(); + Serial.println("MLKEM KEYGEN MESSAGE RECEIVED"); + #endif + if (!CRYPTO_AUTH) { + pending_operation=CTAP2_ERR_USER_ACTION_PENDING; + return; + } + + // Generate 32-byte seed, store via existing ECC slot infrastructure + // buffer[5] = slot (set by caller), buffer[6] = type, buffer[7..38] = key data + RNG2(buffer + 7, 32); + buffer[6] = (KEYTYPE_MLKEM768 & 0x0F) | 0x20; // type with decrypt feature (bit 5) + ecc_priv_flash(buffer, false); + + // Expand seed to 64-byte coins: SHAKE256(seed, 64) + uint8_t coins[64]; + xwing_shake256(coins, 64, buffer + 7, 32); + + // Deterministic keygen + uint8_t *sk = ctap_buffer; + uint8_t *pk = ctap_buffer + MLKEM_SK_SIZE; + if (crypto_kem_keypair_derand(pk, sk, coins) != 0) { + hidprint("Error ML-KEM keygen failed"); + fadeoff(0); + memset(coins, 0, 64); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + return; + } + #ifdef DEBUG + Serial.println("ML-KEM keypair generated"); + Serial.print("PK first 16 bytes: "); + byteprint(pk, 16); + #endif + + pending_operation=CTAP2_ERR_DATA_READY; + send_transport_response(pk, MLKEM_PK_SIZE, true, true); + + memset(coins, 0, 64); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + fadeoff(85); +} + +void okcrypto_mlkem_decaps (uint8_t *buffer) { + extern uint8_t ctap_buffer[CTAPHID_BUFFER_SIZE]; + uint8_t ss[MLKEM_SS_SIZE]; + #ifdef DEBUG + Serial.println(); + Serial.println("MLKEM DECAPS MESSAGE RECEIVED"); + #endif + if (!CRYPTO_AUTH) { + process_packets(buffer, 0, 0); + pending_operation=OKDECRYPT_ERR_USER_ACTION_PENDING; + } + else if (CRYPTO_AUTH == 4) { + okcore_aes_gcm_decrypt(large_buffer, packet_buffer_details[0], packet_buffer_details[1], profilekey, large_buffer_offset); + if (large_buffer_offset != MLKEM_CT_SIZE) { + hidprint("Error ML-KEM CT wrong size"); + fadeoff(0); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + return; + } + + // Seed already loaded into ecc_private_key by okcore_flashget_ECC in dispatch + uint8_t coins[64]; + xwing_shake256(coins, 64, ecc_private_key, 32); + + uint8_t *sk = ctap_buffer; + uint8_t *pk = ctap_buffer + MLKEM_SK_SIZE; + if (crypto_kem_keypair_derand(pk, sk, coins) != 0) { + hidprint("Error ML-KEM key expansion failed"); + memset(coins, 0, 64); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + fadeoff(0); + return; + } + memset(coins, 0, 64); + + if (crypto_kem_dec(ss, large_buffer, sk) != 0) { + hidprint("Error ML-KEM decaps failed"); + memset(ss, 0, sizeof(ss)); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + fadeoff(0); + return; + } + #ifdef DEBUG + Serial.print("Shared secret: "); + byteprint(ss, MLKEM_SS_SIZE); + #endif + + pending_operation=CTAP2_ERR_DATA_READY; + outputmode=packet_buffer_details[2]; + send_transport_response(ss, MLKEM_SS_SIZE, true, true); + if (outputmode != WEBAUTHN) { + wipetasks(); + } + + memset(ss, 0, sizeof(ss)); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + fadeoff(85); + } else { + #ifdef DEBUG + Serial.println("Waiting for challenge buttons to be pressed"); + #endif + } +} + +void okcrypto_mlkem_getpubkey (uint8_t *buffer) { + extern uint8_t ctap_buffer[CTAPHID_BUFFER_SIZE]; + #ifdef DEBUG + Serial.println(); + Serial.println("MLKEM GETPUBKEY MESSAGE RECEIVED"); + #endif + + // Seed already loaded into ecc_private_key by okcore_flashget_ECC in dispatch + uint8_t coins[64]; + xwing_shake256(coins, 64, ecc_private_key, 32); + + uint8_t *sk = ctap_buffer; + uint8_t *pk = ctap_buffer + MLKEM_SK_SIZE; + crypto_kem_keypair_derand(pk, sk, coins); + memset(coins, 0, 64); + + send_transport_response(pk, MLKEM_PK_SIZE, true, true); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); +} + +/*************************************/ +//X-Wing KEM operations (draft-connolly-cfrg-xwing-kem-09) +//Compatible with age v1.3.0 mlkem768x25519 recipient type +//Seed stored as 32-byte ECC key with KEYTYPE_XWING +//PK: pk_M(1184) || pk_X(32) = 1216 bytes +//CT: ct_M(1088) || ct_X(32) = 1120 bytes +//SS: SHA3-256(ss_M || ss_X || ct_X || pk_X || XWingLabel) = 32 bytes +/*************************************/ + +void okcrypto_xwing_keygen (uint8_t *buffer) { + extern uint8_t ctap_buffer[CTAPHID_BUFFER_SIZE]; + #ifdef DEBUG + Serial.println(); + Serial.println("XWING KEYGEN MESSAGE RECEIVED"); + #endif + if (!CRYPTO_AUTH) { + pending_operation=CTAP2_ERR_USER_ACTION_PENDING; + return; + } + + // Generate 32-byte seed, store via existing ECC slot infrastructure + RNG2(buffer + 7, XWING_SEED_SIZE); + buffer[6] = (KEYTYPE_XWING & 0x0F) | 0x20; // type with decrypt feature (bit 5) + ecc_priv_flash(buffer, false); + + // Expand seed: SHAKE256(seed, 96) + uint8_t expanded[96]; + xwing_shake256(expanded, 96, buffer + 7, XWING_SEED_SIZE); + + // ML-KEM-768 deterministic keygen from expanded[0:64] + uint8_t *sk_M = ctap_buffer; + uint8_t *pk_M = ctap_buffer + MLKEM_SK_SIZE; + if (crypto_kem_keypair_derand(pk_M, sk_M, expanded) != 0) { + hidprint("Error X-Wing ML-KEM keygen failed"); + fadeoff(0); + memset(expanded, 0, 96); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + return; + } + + // X25519: pk_X = X25519(expanded[64:96], BASE) + uint8_t pk_X[32]; + crypto_scalarmult_base(pk_X, expanded + 64); + + // Build PK: pk_M(1184) || pk_X(32) + memcpy(pk_M + MLKEM_PK_SIZE, pk_X, 32); + + #ifdef DEBUG + Serial.println("X-Wing keypair generated"); + Serial.print("PK_M first 16: "); + byteprint(pk_M, 16); + Serial.print("PK_X: "); + byteprint(pk_X, 32); + #endif + + pending_operation=CTAP2_ERR_DATA_READY; + send_transport_response(pk_M, XWING_PK_SIZE, true, true); + + memset(expanded, 0, 96); + memset(pk_X, 0, 32); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + XWING_PK_SIZE); + fadeoff(85); +} + +void okcrypto_xwing_decaps (uint8_t *buffer) { + extern uint8_t ctap_buffer[CTAPHID_BUFFER_SIZE]; + uint8_t ss_M[32]; + uint8_t ss_X[32]; + uint8_t ss[XWING_SS_SIZE]; + #ifdef DEBUG + Serial.println(); + Serial.println("XWING DECAPS MESSAGE RECEIVED"); + #endif + if (!CRYPTO_AUTH) { + process_packets(buffer, 0, 0); + pending_operation=OKDECRYPT_ERR_USER_ACTION_PENDING; + } + else if (CRYPTO_AUTH == 4) { + okcore_aes_gcm_decrypt(large_buffer, packet_buffer_details[0], packet_buffer_details[1], profilekey, large_buffer_offset); + if (large_buffer_offset != XWING_CT_SIZE) { + hidprint("Error X-Wing CT wrong size"); + fadeoff(0); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + return; + } + + uint8_t *ct_M = large_buffer; + uint8_t *ct_X = large_buffer + MLKEM_CT_SIZE; + + // Seed already loaded into ecc_private_key by okcore_flashget_ECC in dispatch + uint8_t expanded[96]; + xwing_shake256(expanded, 96, ecc_private_key, XWING_SEED_SIZE); + + // Reconstruct ML-KEM keypair from expanded[0:64] + uint8_t *sk_M = ctap_buffer; + uint8_t *pk_M = ctap_buffer + MLKEM_SK_SIZE; + if (crypto_kem_keypair_derand(pk_M, sk_M, expanded) != 0) { + hidprint("Error X-Wing key expansion failed"); + memset(expanded, 0, 96); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + fadeoff(0); + return; + } + + // X25519 keys from expanded[64:96] + uint8_t *sk_X = expanded + 64; + uint8_t pk_X[32]; + crypto_scalarmult_base(pk_X, sk_X); + + // ML-KEM-768 decapsulation + if (crypto_kem_dec(ss_M, ct_M, sk_M) != 0) { + hidprint("Error X-Wing ML-KEM decaps failed"); + memset(ss_M, 0, 32); + memset(expanded, 0, 96); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + fadeoff(0); + return; + } + + // X25519 ECDH: ss_X = X25519(sk_X, ct_X) + crypto_scalarmult(ss_X, sk_X, ct_X); + + // X-Wing Combiner + xwing_combiner(ss, ss_M, ss_X, ct_X, pk_X); + + #ifdef DEBUG + Serial.print("ss_M: "); byteprint(ss_M, 32); + Serial.print("ss_X: "); byteprint(ss_X, 32); + Serial.print("X-Wing SS: "); byteprint(ss, XWING_SS_SIZE); + #endif + + pending_operation=CTAP2_ERR_DATA_READY; + outputmode=packet_buffer_details[2]; + send_transport_response(ss, XWING_SS_SIZE, true, true); + if (outputmode != WEBAUTHN) { + wipetasks(); + } + + memset(ss_M, 0, 32); + memset(ss_X, 0, 32); + memset(ss, 0, XWING_SS_SIZE); + memset(expanded, 0, 96); + memset(pk_X, 0, 32); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + MLKEM_PK_SIZE); + memset(large_buffer, 0, LARGE_BUFFER_SIZE); + fadeoff(85); + } else { + #ifdef DEBUG + Serial.println("Waiting for challenge buttons to be pressed"); + #endif + } +} + +void okcrypto_xwing_getpubkey (uint8_t *buffer) { + extern uint8_t ctap_buffer[CTAPHID_BUFFER_SIZE]; + #ifdef DEBUG + Serial.println(); + Serial.println("XWING GETPUBKEY MESSAGE RECEIVED"); + #endif + + // Seed already loaded into ecc_private_key by okcore_flashget_ECC in dispatch + uint8_t expanded[96]; + xwing_shake256(expanded, 96, ecc_private_key, XWING_SEED_SIZE); + + uint8_t *sk_M = ctap_buffer; + uint8_t *pk_M = ctap_buffer + MLKEM_SK_SIZE; + crypto_kem_keypair_derand(pk_M, sk_M, expanded); + + uint8_t pk_X[32]; + crypto_scalarmult_base(pk_X, expanded + 64); + + memcpy(pk_M + MLKEM_PK_SIZE, pk_X, 32); + + send_transport_response(pk_M, XWING_PK_SIZE, true, true); + + memset(expanded, 0, 96); + memset(pk_X, 0, 32); + memset(ctap_buffer, 0, MLKEM_SK_SIZE + XWING_PK_SIZE); +} + void okcrypto_compute_pubkey() { memset(ecc_public_key, 0, sizeof(ecc_public_key)); + // PQ key types store seeds, not traditional ECC keys β€” pubkey is + // derived on demand via SHAKE256 expansion, not from ecc_private_key + if (type == KEYTYPE_MLKEM768 || type == KEYTYPE_XWING) return; + if (type == KEYTYPE_ED25519) { Ed25519::derivePublicKey(ecc_public_key, ecc_private_key); } diff --git a/onlykey/okcrypto.h b/onlykey/okcrypto.h index 44dc14f..a044470 100644 --- a/onlykey/okcrypto.h +++ b/onlykey/okcrypto.h @@ -134,6 +134,13 @@ extern void okcrypto_aes_gcm_decrypt2 (uint8_t * state, uint8_t * iv1, const uin extern void okcrypto_aes_cbc_encrypt (uint8_t * state, uint8_t * iv, const uint8_t * key, int len); extern void okcrypto_aes_cbc_decrypt (uint8_t * state, uint8_t * iv, const uint8_t * key, int len); +extern void okcrypto_mlkem_keygen (uint8_t *buffer); +extern void okcrypto_mlkem_decaps (uint8_t *buffer); +extern void okcrypto_mlkem_getpubkey (uint8_t *buffer); +extern void okcrypto_xwing_keygen (uint8_t *buffer); +extern void okcrypto_xwing_decaps (uint8_t *buffer); +extern void okcrypto_xwing_getpubkey (uint8_t *buffer); + #ifdef __cplusplus } diff --git a/readme.md b/readme.md index efee056..7b81569 100644 --- a/readme.md +++ b/readme.md @@ -18,6 +18,7 @@ The following cryptographic software is included in this distribution: "MICRO-ECC PROJECT" - https://github.com/kmackay/micro-ecc "ARDUINOLIBS PROJECT" - https://rweather.github.io/arduinolibs/crypto.html "YUBICO-C PROJECT" - https://github.com/Yubico/yubico-c + "MLKEM-NATIVE PROJECT" - https://github.com/pq-code-package/mlkem-native For more information on export restrictions see: http://www.apache.org/licenses/exports/