Skip to content

Commit

Permalink
AWS-LC s2n-bignum update 2024-07-22 (#1718)
Browse files Browse the repository at this point in the history
  • Loading branch information
dkostic authored Jul 24, 2024
2 parents f723a0c + a4f3e5a commit b7d7a99
Show file tree
Hide file tree
Showing 41 changed files with 26,177 additions and 4,269 deletions.
2 changes: 1 addition & 1 deletion third_party/s2n-bignum/arm/curve25519/curve25519_x25519.S
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,7 @@ curve25519_x25519_scalarloop:
usra v20.2d, v25.2d, #25
and v27.16b, v25.16b, v23.16b // ubignum_of_hreglist 1 + ubignum_of_lreglist 1 // INTERMEDIATE H|L = x4|z5
bfi x17, x7, #32, #25 // ubignum_of_preglist 1 // INTERMEDIATE z4
mov v5.d[0], x3 // depth 86
mov v5.d[0], x3
mov v1.d[0], x5 // FINAL z2
usra v26.2d, v20.2d, #26 // ubignum_of_hreglist 3 + ubignum_of_lreglist 3 // INTERMEDIATE H|L = x4|z5
and v28.16b, v20.16b, v30.16b // ubignum_of_hreglist 2 + ubignum_of_lreglist 2 // INTERMEDIATE H|L = x4|z5
Expand Down
36 changes: 10 additions & 26 deletions third_party/s2n-bignum/arm/curve25519/curve25519_x25519_alt.S
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,7 @@ curve25519_x25519_alt_scalarloop:

// Multiplex directly into (xn,zn) then do three pure doubling steps;
// this accounts for the implicit zeroing of the three lowest bits
// of the scalar. On the very last doubling we *fully* reduce zn mod
// p_25519 to ease checking for degeneracy below.
// of the scalar.

cmp swap, xzr
mux_4(xn,xm,xn)
Expand Down Expand Up @@ -631,20 +630,20 @@ curve25519_x25519_alt_scalarloop:
orr x1, x1, 0x10000
cmadd_4(e,p,d)
mul_4(xn,s,d)
mul_p25519(zn,p,e)
mul_4(zn,p,e)

// The projective result of the scalar multiplication is now (xn,zn).
// Prepare to call the modular inverse function to get xm = 1/zn
// Prepare to call the modular inverse function to get zn' = 1/zn

add x0, xm
add x0, zn
add x1, zn

// Inline copy of bignum_inv_p25519, identical except for stripping out
// the prologue and epilogue saving and restoring registers and making
// and reclaiming room on the stack. For more details and explanations see
// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for
// its own temporaries is 128 bytes, so it has no effect on variables
// that are needed in the rest of our computation here: res, xm and zn.
// that are needed in the rest of our computation here: res, xn and zn.

mov x20, x0
mov x10, #0xffffffffffffffed
Expand Down Expand Up @@ -1675,28 +1674,13 @@ curve25519_x25519_alt_invmidloop:
stp x0, x1, [x4]
stp x2, x5, [x4, #16]

// Since we eventually want to return 0 when the result is the point at
// infinity, we force xn = 0 whenever zn = 0. This avoids building in a
// dependency on the behavior of modular inverse in out-of-scope cases.

ldp x0, x1, [zn]
ldp x2, x3, [zn+16]
orr x0, x0, x1
orr x2, x2, x3
orr x4, x0, x2
cmp x4, xzr
ldp x0, x1, [xn]
csel x0, x0, xzr, ne
csel x1, x1, xzr, ne
ldp x2, x3, [xn+16]
stp x0, x1, [xn]
csel x2, x2, xzr, ne
csel x3, x3, xzr, ne
stp x2, x3, [xn+16]

// Now the result is xn * (1/zn), fully reduced modulo p.
// Note that in the degenerate case zn = 0 (mod p_25519), the
// modular inverse code above will produce 1/zn = 0, giving
// the correct overall X25519 result of zero for the point at
// infinity.

mul_p25519(resx,xn,xm)
mul_p25519(resx,xn,zn)

// Restore stack and registers

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1360,7 +1360,7 @@ curve25519_x25519_byte_scalarloop:
usra v20.2d, v25.2d, #25
and v27.16b, v25.16b, v23.16b // ubignum_of_hreglist 1 + ubignum_of_lreglist 1 // INTERMEDIATE H|L = x4|z5
bfi x17, x7, #32, #25 // ubignum_of_preglist 1 // INTERMEDIATE z4
mov v5.d[0], x3 // depth 86
mov v5.d[0], x3
mov v1.d[0], x5 // FINAL z2
usra v26.2d, v20.2d, #26 // ubignum_of_hreglist 3 + ubignum_of_lreglist 3 // INTERMEDIATE H|L = x4|z5
and v28.16b, v20.16b, v30.16b // ubignum_of_hreglist 2 + ubignum_of_lreglist 2 // INTERMEDIATE H|L = x4|z5
Expand Down
36 changes: 10 additions & 26 deletions third_party/s2n-bignum/arm/curve25519/curve25519_x25519_byte_alt.S
Original file line number Diff line number Diff line change
Expand Up @@ -711,8 +711,7 @@ curve25519_x25519_byte_alt_scalarloop:

// Multiplex directly into (xn,zn) then do three pure doubling steps;
// this accounts for the implicit zeroing of the three lowest bits
// of the scalar. On the very last doubling we *fully* reduce zn mod
// p_25519 to ease checking for degeneracy below.
// of the scalar.

cmp swap, xzr
mux_4(xn,xm,xn)
Expand Down Expand Up @@ -749,20 +748,20 @@ curve25519_x25519_byte_alt_scalarloop:
orr x1, x1, 0x10000
cmadd_4(e,p,d)
mul_4(xn,s,d)
mul_p25519(zn,p,e)
mul_4(zn,p,e)

// The projective result of the scalar multiplication is now (xn,zn).
// Prepare to call the modular inverse function to get xm = 1/zn
// Prepare to call the modular inverse function to get zn' = 1/zn

add x0, xm
add x0, zn
add x1, zn

// Inline copy of bignum_inv_p25519, identical except for stripping out
// the prologue and epilogue saving and restoring registers and making
// and reclaiming room on the stack. For more details and explanations see
// "arm/curve25519/bignum_inv_p25519.S". Note that the stack it uses for
// its own temporaries is 128 bytes, so it has no effect on variables
// that are needed in the rest of our computation here: res, xm and zn.
// that are needed in the rest of our computation here: res, xn and zn.

mov x20, x0
mov x10, #0xffffffffffffffed
Expand Down Expand Up @@ -1793,28 +1792,13 @@ curve25519_x25519_byte_alt_invmidloop:
stp x0, x1, [x4]
stp x2, x5, [x4, #16]

// Since we eventually want to return 0 when the result is the point at
// infinity, we force xn = 0 whenever zn = 0. This avoids building in a
// dependency on the behavior of modular inverse in out-of-scope cases.

ldp x0, x1, [zn]
ldp x2, x3, [zn+16]
orr x0, x0, x1
orr x2, x2, x3
orr x4, x0, x2
cmp x4, xzr
ldp x0, x1, [xn]
csel x0, x0, xzr, ne
csel x1, x1, xzr, ne
ldp x2, x3, [xn+16]
stp x0, x1, [xn]
csel x2, x2, xzr, ne
csel x3, x3, xzr, ne
stp x2, x3, [xn+16]

// Now the result is xn * (1/zn), fully reduced modulo p.
// Note that in the degenerate case zn = 0 (mod p_25519), the
// modular inverse code above will produce 1/zn = 0, giving
// the correct overall X25519 result of zero for the point at
// infinity.

mul_p25519(zn,xn,xm)
mul_p25519(zn,xn,zn)

ldp x10, x11, [zn]
strb w10, [resx]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0

// ----------------------------------------------------------------------------
// Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0

// ----------------------------------------------------------------------------
// Given table: uint64_t[height*16], copy table[idx*16...(idx+1)*16-1]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0

// ----------------------------------------------------------------------------
// Given table: uint64_t[height*32], copy table[idx*32...(idx+1)*32-1]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0

// ----------------------------------------------------------------------------
// Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1]
// into z[0..width-1]. width must be a mutiple of 8.
// into z[0..width-1]. width must be a multiple of 8.
// This function is constant-time with respect to the value of `idx`. This is
// achieved by reading the whole table and using the bit-masking to get the
// `idx`-th row.
Expand Down
7 changes: 6 additions & 1 deletion third_party/s2n-bignum/arm/p384/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ OBJ = bignum_add_p384.o \
bignum_mod_p384_6.o \
bignum_montmul_p384.o \
bignum_montmul_p384_alt.o \
bignum_montmul_p384_neon.o \
bignum_montsqr_p384.o \
bignum_montsqr_p384_alt.o \
bignum_montsqr_p384_neon.o \
bignum_mux_6.o \
bignum_neg_p384.o \
bignum_nonzero_6.o \
Expand All @@ -45,8 +47,11 @@ OBJ = bignum_add_p384.o \
bignum_tomont_p384.o \
bignum_triple_p384.o \
p384_montjadd.o \
p384_montjadd_alt.o \
p384_montjdouble.o \
p384_montjmixadd.o
p384_montjdouble_alt.o \
p384_montjmixadd.o \
p384_montjmixadd_alt.o

%.o : %.S ; $(CC) -E -I../../include $< | $(GAS) -o $@ -

Expand Down
Loading

0 comments on commit b7d7a99

Please sign in to comment.