Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 137 additions & 0 deletions .github/workflows/avx512-sde.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Copyright 2026 The OpenSSL Project Authors. All Rights Reserved.
# Copyright (c) 2026 Intel Corporation. All Rights Reserved.
#
# Licensed under the Apache License 2.0 (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html

# Run AVX512VL-specific tests under Intel SDE.
#
# GitHub Actions runners currently do not have AVX512 hardware.
# Intel SDE emulates AVX512 instructions and spoofs CPUID,
# so AVX512 code paths are exercised.
#
# To update Intel SDE: find the new mirror ID and file date from
# https://www.intel.com/content/www/us/en/download/684897
# and update the three env vars below.

name: AVX512 tests via Intel SDE

on: [pull_request, push]

permissions:
contents: read

env:
SDE_VERSION: 10.8.0
SDE_DATE: 2026-03-15
SDE_MIRROR_ID: 915934

jobs:
linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false

- name: install NASM
run: sudo apt-get install -y nasm

- name: install Intel SDE
run: |
SDE_URL="https://downloadmirror.intel.com/${SDE_MIRROR_ID}/sde-external-${SDE_VERSION}-${SDE_DATE}-lin.tar.xz"
SDE_SHA256="50b320cd226acef7a491f5b321fc1be3c3c7984f9e27a456e64894b5b0979dd3"
curl -fsSL -o /tmp/sde.tar.xz "$SDE_URL"
echo "$SDE_SHA256 /tmp/sde.tar.xz" | sha256sum -c -
mkdir /tmp/sde
tar -xf /tmp/sde.tar.xz -C /tmp/sde/
sudo mv /tmp/sde/sde-external-${SDE_VERSION}-${SDE_DATE}-lin /opt/sde
echo "/opt/sde" >> "$GITHUB_PATH"

- name: config
run: |
./config --banner=Configured --strict-warnings no-shared enable-fips

- name: build
run: make -j4

- name: show CPU and OpenSSL build info
run: |
cat /proc/cpuinfo | grep -m1 "model name"
sde64 -skx -- ./apps/openssl version -c

- name: ml_dsa_internal_test (AVX512VL via SDE)
run: sde64 -skx -- ./test/ml_dsa_internal_test

- name: sha3_x4_internal_test (AVX512VL via SDE)
run: sde64 -skx -- ./test/sha3_x4_internal_test

- name: fipsinstall (FIPS KAT via SDE)
run: sde64 -skx -- ./apps/openssl fipsinstall -module ./providers/fips.so -out /tmp/fipsmodule.cnf -provider_name fips

windows:
runs-on: windows-2022
env:
VCVARS: C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat
steps:
- uses: actions/checkout@v6
with:
persist-credentials: false

- name: install NASM
run: |
choco install nasm
"C:\Program Files\NASM" | Out-File -FilePath "$env:GITHUB_PATH" -Append

- name: install JOM
run: choco install jom

- name: install Intel SDE
run: |
$url = "https://downloadmirror.intel.com/$env:SDE_MIRROR_ID/sde-external-$env:SDE_VERSION-$env:SDE_DATE-win.tar.xz"
$expected = "176F87C80EB42BB91B73E1428F4A0FD067DF322F901F9B4359B20B86B92C2BAE"
curl.exe -fsSL -o sde-win.tar.xz $url
$actual = (Get-FileHash sde-win.tar.xz -Algorithm SHA256).Hash
if ($actual -ne $expected) { throw "SDE SHA256 mismatch: got $actual" }
& "C:\Program Files\7-Zip\7z.exe" x sde-win.tar.xz -so | & "C:\Program Files\7-Zip\7z.exe" x -si -ttar -o"C:\sde"
$sdeRoot = "C:\sde\sde-external-$env:SDE_VERSION-$env:SDE_DATE-win"
if (-not (Test-Path "$sdeRoot\sde.exe")) { throw "sde.exe not found in $sdeRoot" }
"$sdeRoot" | Out-File -FilePath $env:GITHUB_PATH -Append

- name: prepare build directory
run: mkdir _build

- name: config
working-directory: _build
shell: cmd
run: |
call "%VCVARS%"
perl ..\Configure --banner=Configured --strict-warnings no-shared enable-fips no-makedepend

- name: build
working-directory: _build
shell: cmd
run: |
call "%VCVARS%"
jom /j4 /S

- name: show CPU and OpenSSL build info
working-directory: _build
run: sde -skx -- apps\openssl.exe version -c

- name: ml_dsa_internal_test (AVX512VL via SDE)
working-directory: _build
shell: cmd
run: sde -skx -- test\ml_dsa_internal_test.exe

- name: sha3_x4_internal_test (AVX512VL via SDE)
working-directory: _build
shell: cmd
run: sde -skx -- test\sha3_x4_internal_test.exe

- name: fipsinstall (FIPS KAT via SDE)
working-directory: _build
shell: cmd
run: sde -skx -- apps\openssl.exe fipsinstall -module providers\fips.dll -out fipsmodule.cnf -provider_name fips
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ OpenSSL Releases

### Changes between 4.0 and 4.1 [xx XXX xxxx]

* Added AVX512 optimized SHAKE x4 operations for ML-DSA on x86_64.

*Marcel Cornu and Tomasz Kantecki*

* Added test framework for testing function memory allocation failures.

*Jakub Zelenka*
Expand Down
5 changes: 5 additions & 0 deletions crypto/ml_dsa/ml_dsa_hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
* https://www.openssl.org/source/license.html
*/

#ifndef OSSL_CRYPTO_ML_DSA_HASH_H
#define OSSL_CRYPTO_ML_DSA_HASH_H

#include <openssl/evp.h>

static ossl_inline ossl_unused int
Expand Down Expand Up @@ -39,3 +42,5 @@ shake_xof_3(EVP_MD_CTX *ctx, const EVP_MD *md, const uint8_t *in1, size_t in1_le
&& EVP_DigestUpdate(ctx, in3, in3_len)
&& EVP_DigestSqueeze(ctx, out, out_len);
}

#endif /* OSSL_CRYPTO_ML_DSA_HASH_H */
16 changes: 10 additions & 6 deletions crypto/ml_dsa/ml_dsa_key.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ int ossl_ml_dsa_key_has(const ML_DSA_KEY *key, int selection)
* @returns 1 on success, or 0 on failure.
*/
static int public_from_private(const ML_DSA_KEY *key, EVP_MD_CTX *md_ctx,
VECTOR *t1, VECTOR *t0)
const OSSL_ML_DSA_SAMPLE_OPS *sample_ops, VECTOR *t1, VECTOR *t0)
{
int ret = 0;
const ML_DSA_PARAMS *params = key->params;
Expand All @@ -351,7 +351,7 @@ static int public_from_private(const ML_DSA_KEY *key, EVP_MD_CTX *md_ctx,
matrix_init(&a_ntt, s1_ntt.poly + l, k, l);

/* Using rho generate A' = A in NTT form */
if (!matrix_expand_A(md_ctx, key->shake128_md, key->rho, &a_ntt))
if (!sample_ops->matrix_expand_A(md_ctx, key->shake128_md, key->rho, &a_ntt))
goto err;

/* t = NTT_inv(A' * NTT(s1)) + s2 */
Expand All @@ -376,14 +376,15 @@ static int public_from_private(const ML_DSA_KEY *key, EVP_MD_CTX *md_ctx,
int ossl_ml_dsa_key_public_from_private(ML_DSA_KEY *key)
{
int ret = 0;
const OSSL_ML_DSA_SAMPLE_OPS *sample_ops = ossl_ml_dsa_sample_ops();
VECTOR t0;
EVP_MD_CTX *md_ctx = NULL;

if (!vector_alloc(&t0, key->params->k)) /* t0 is already in the private key */
return 0;
ret = ((md_ctx = EVP_MD_CTX_new()) != NULL)
&& ossl_ml_dsa_key_pub_alloc(key) /* allocate space for t1 */
&& public_from_private(key, md_ctx, &key->t1, &t0)
&& public_from_private(key, md_ctx, sample_ops, &key->t1, &t0)
&& vector_equal(&t0, &key->t0) /* compare the generated t0 to the expected */
&& ossl_ml_dsa_pk_encode(key)
&& shake_xof(md_ctx, key->shake256_md,
Expand All @@ -397,6 +398,7 @@ int ossl_ml_dsa_key_public_from_private(ML_DSA_KEY *key)
int ossl_ml_dsa_key_pairwise_check(const ML_DSA_KEY *key)
{
int ret = 0;
const OSSL_ML_DSA_SAMPLE_OPS *sample_ops = ossl_ml_dsa_sample_ops();
VECTOR t1, t0;
POLY *polys = NULL;
uint32_t k = (uint32_t)key->params->k;
Expand All @@ -414,7 +416,7 @@ int ossl_ml_dsa_key_pairwise_check(const ML_DSA_KEY *key)

vector_init(&t1, polys, k);
vector_init(&t0, polys + k, k);
if (!public_from_private(key, md_ctx, &t1, &t0))
if (!public_from_private(key, md_ctx, sample_ops, &t1, &t0))
goto err;

ret = vector_equal(&t1, &key->t1) && vector_equal(&t0, &key->t0);
Expand All @@ -435,6 +437,7 @@ int ossl_ml_dsa_key_pairwise_check(const ML_DSA_KEY *key)
static int keygen_internal(ML_DSA_KEY *out)
{
int ret = 0;
const OSSL_ML_DSA_SAMPLE_OPS *sample_ops = ossl_ml_dsa_sample_ops();
uint8_t augmented_seed[ML_DSA_SEED_BYTES + 2];
uint8_t expanded_seed[ML_DSA_RHO_BYTES + ML_DSA_PRIV_SEED_BYTES + ML_DSA_K_BYTES];
const uint8_t *const rho = expanded_seed; /* p = Public Random Seed */
Expand All @@ -461,8 +464,9 @@ static int keygen_internal(ML_DSA_KEY *out)
memcpy(out->rho, rho, sizeof(out->rho));
memcpy(out->K, K, sizeof(out->K));

ret = vector_expand_S(md_ctx, out->shake256_md, params->eta, priv_seed, &out->s1, &out->s2)
&& public_from_private(out, md_ctx, &out->t1, &out->t0)
ret = sample_ops->vector_expand_S(md_ctx, out->shake256_md, params->eta,
priv_seed, &out->s1, &out->s2)
&& public_from_private(out, md_ctx, sample_ops, &out->t1, &out->t0)
&& ossl_ml_dsa_pk_encode(out)
&& shake_xof(md_ctx, out->shake256_md, out->pub_encoding, out->params->pk_len,
out->tr, sizeof(out->tr))
Expand Down
19 changes: 16 additions & 3 deletions crypto/ml_dsa/ml_dsa_local.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,23 @@ typedef struct vector_st VECTOR;
typedef struct matrix_st MATRIX;
typedef struct ml_dsa_sig_st ML_DSA_SIG;

int ossl_ml_dsa_matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md,
typedef int(ML_DSA_MATRIX_EXPAND_A_FN)(EVP_MD_CTX *g_ctx, const EVP_MD *md,
const uint8_t *rho, MATRIX *out);
int ossl_ml_dsa_vector_expand_S(EVP_MD_CTX *h_ctx, const EVP_MD *md, int eta,
const uint8_t *seed, VECTOR *s1, VECTOR *s2);
typedef int(ML_DSA_VECTOR_EXPAND_S_FN)(EVP_MD_CTX *h_ctx, const EVP_MD *md,
int eta, const uint8_t *seed, VECTOR *s1, VECTOR *s2);
typedef void(ML_DSA_VECTOR_EXPAND_MASK_FN)(VECTOR *out, const uint8_t *rho_prime,
size_t rho_prime_len, uint32_t kappa, uint32_t gamma1,
EVP_MD_CTX *h_ctx, const EVP_MD *md);

typedef struct ossl_ml_dsa_sample_ops_st {
ML_DSA_MATRIX_EXPAND_A_FN *matrix_expand_A;
ML_DSA_VECTOR_EXPAND_S_FN *vector_expand_S;
ML_DSA_VECTOR_EXPAND_MASK_FN *vector_expand_mask;
} OSSL_ML_DSA_SAMPLE_OPS;

const OSSL_ML_DSA_SAMPLE_OPS *ossl_ml_dsa_sample_ops(void);
const OSSL_ML_DSA_SAMPLE_OPS *ossl_ml_dsa_sample_generic_ops(void);
const OSSL_ML_DSA_SAMPLE_OPS *ossl_ml_dsa_sample_x86_64_ops(void);
void ossl_ml_dsa_matrix_mult_vector(const MATRIX *matrix_kl, const VECTOR *vl,
VECTOR *vk);
int ossl_ml_dsa_poly_expand_mask(POLY *out, const uint8_t *seed, size_t seed_len,
Expand Down
7 changes: 0 additions & 7 deletions crypto/ml_dsa/ml_dsa_matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,3 @@ matrix_mult_vector(const MATRIX *a, const VECTOR *s, VECTOR *t)
{
ossl_ml_dsa_matrix_mult_vector(a, s, t);
}

static ossl_inline ossl_unused int
matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md, const uint8_t *rho,
MATRIX *out)
{
return ossl_ml_dsa_matrix_expand_A(g_ctx, md, rho, out);
}
64 changes: 61 additions & 3 deletions crypto/ml_dsa/ml_dsa_sample.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
*/

#include <openssl/byteorder.h>
#include <openssl/crypto.h>
#include "ml_dsa_local.h"
#include "ml_dsa_vector.h"
#include "ml_dsa_matrix.h"
Expand Down Expand Up @@ -35,6 +36,10 @@ typedef int(COEFF_FROM_NIBBLE_FUNC)(uint32_t nibble, uint32_t *out);
static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_4;
static COEFF_FROM_NIBBLE_FUNC coeff_from_nibble_2;

static ML_DSA_MATRIX_EXPAND_A_FN matrix_expand_A_scalar;
static ML_DSA_VECTOR_EXPAND_S_FN vector_expand_S_scalar;
static ML_DSA_VECTOR_EXPAND_MASK_FN vector_expand_mask_scalar;

/**
* @brief Combine 3 bytes to form an coefficient.
* See FIPS 204, Algorithm 14, CoeffFromThreeBytes()
Expand Down Expand Up @@ -198,7 +203,7 @@ static int rej_bounded_poly(EVP_MD_CTX *h_ctx, const EVP_MD *md,
* in the range of 0..q-1.
* @returns 1 if the matrix was generated, or 0 on error.
*/
int ossl_ml_dsa_matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md,
static int matrix_expand_A_scalar(EVP_MD_CTX *g_ctx, const EVP_MD *md,
const uint8_t *rho, MATRIX *out)
{
int ret = 0;
Expand All @@ -208,7 +213,6 @@ int ossl_ml_dsa_matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md,

/* The seed used for each matrix element is rho + column_index + row_index */
memcpy(derived_seed, rho, ML_DSA_RHO_BYTES);

for (i = 0; i < out->k; i++) {
for (j = 0; j < out->l; j++) {
derived_seed[ML_DSA_RHO_BYTES + 1] = (uint8_t)i;
Expand Down Expand Up @@ -241,7 +245,7 @@ int ossl_ml_dsa_matrix_expand_A(EVP_MD_CTX *g_ctx, const EVP_MD *md,
* the range (q-eta)..0..eta
* @returns 1 if s1 and s2 were successfully generated, or 0 otherwise.
*/
int ossl_ml_dsa_vector_expand_S(EVP_MD_CTX *h_ctx, const EVP_MD *md, int eta,
static int vector_expand_S_scalar(EVP_MD_CTX *h_ctx, const EVP_MD *md, int eta,
const uint8_t *seed, VECTOR *s1, VECTOR *s2)
{
int ret = 0;
Expand Down Expand Up @@ -376,3 +380,57 @@ int ossl_ml_dsa_poly_sample_in_ball(POLY *out_c, const uint8_t *seed, int seed_l
}
return 1;
}

static void vector_expand_mask_scalar(VECTOR *out, const uint8_t *rho_prime,
size_t rho_prime_len, uint32_t kappa, uint32_t gamma1,
EVP_MD_CTX *h_ctx, const EVP_MD *md)
{
size_t i;
uint8_t derived_seed[ML_DSA_RHO_PRIME_BYTES + 2];

(void)rho_prime_len;

memcpy(derived_seed, rho_prime, ML_DSA_RHO_PRIME_BYTES);

for (i = 0; i < out->num_poly; i++) {
size_t index = kappa + i;

derived_seed[ML_DSA_RHO_PRIME_BYTES] = index & 0xFF;
derived_seed[ML_DSA_RHO_PRIME_BYTES + 1] = (index >> 8) & 0xFF;
poly_expand_mask(out->poly + i, derived_seed, sizeof(derived_seed),
gamma1, h_ctx, md);
}
}

static const OSSL_ML_DSA_SAMPLE_OPS ml_dsa_sample_generic_meth = {
matrix_expand_A_scalar,
vector_expand_S_scalar,
vector_expand_mask_scalar
};

const OSSL_ML_DSA_SAMPLE_OPS *ossl_ml_dsa_sample_ops(void)
{
#if defined(KECCAK1600_ASM) \
&& (defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)) \
&& !defined(OPENSSL_NO_ASM)
return ossl_ml_dsa_sample_x86_64_ops();
#else
return ossl_ml_dsa_sample_generic_ops();
#endif
}

const OSSL_ML_DSA_SAMPLE_OPS *ossl_ml_dsa_sample_generic_ops(void)
{
return &ml_dsa_sample_generic_meth;
}

#if defined(KECCAK1600_ASM) \
&& (defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)) \
&& !defined(OPENSSL_NO_ASM)
#include "ml_dsa_sample_hw_x86_64.inc"
#else
const OSSL_ML_DSA_SAMPLE_OPS *ossl_ml_dsa_sample_x86_64_ops(void)
{
return ossl_ml_dsa_sample_generic_ops();
}
#endif
Loading
Loading