Skip to content

Commit

Permalink
add the KT256
Browse files Browse the repository at this point in the history
  • Loading branch information
hadielyakhni committed Feb 28, 2024
1 parent 239d0ac commit 9d54251
Showing 1 changed file with 131 additions and 21 deletions.
152 changes: 131 additions & 21 deletions Standalone/KangarooTwelve/Python/K12-SIMD-simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
With SIMD instructions applying to N×64-bit words, one can efficiently hash N leaves in parallel.
"""

from TurboSHAKE import ROL64, load64, store64, KeccakP1600, KeccakP1600onLanes, TurboSHAKE128
from TurboSHAKE import ROL64, load64, store64, KeccakP1600, KeccakP1600onLanes, TurboSHAKE128, TurboSHAKE256
from K12 import right_encode

"""
Expand All @@ -42,7 +42,7 @@
lanes[x][y][i] for 0 ≤ x,y < 5 and 0 ≤ i < N contains the lane at coordinates (x, y) of the i-th state.
nrRounds: integer
The number of rounds to perform, must be 12 for Keccak-p[1600, 12] used in K12.
The number of rounds to perform, must be 12 for Keccak-p[1600, 12] used in KT128 and KT256.
Returns
-------
Expand Down Expand Up @@ -89,7 +89,8 @@ def KeccakP1600timesN_SIMD(N, states, nrRounds):
For the first state, we take the 8*laneCount bytes at data[0 : 8*laneCount] and add them to it.
For the second state, we take 8*laneCount bytes 8*laneOffset bytes further, so at data[8*laneOffset : 8*laneOffset + 8*laneCount], and add them to it.
For the third state, we do the same, but again 8*laneOffset bytes further, etc.
For K12, 8*laneOffset is 8192 so that the first state gets data from the first leaf, the second state from the second leaf, etc.
// NOTE: important
For KT128 (and KT256), 8*laneOffset is 8192 so that the first state gets data from the first leaf, the second state from the second leaf, etc.
Parameters
----------
Expand Down Expand Up @@ -136,7 +137,7 @@ def KeccakP1600timesN_AddLanesAll(N, states, data, laneCount, laneOffset):
return states


# Global variables for K12
# Global variables for KT128 and KT256
B = 8192

"""
Expand All @@ -159,7 +160,7 @@ def KeccakP1600timesN_AddLanesAll(N, states, data, laneCount, laneOffset):
byte array
The N chaining values.
"""
def KangarooTwelve_ProcessLeaves(N, data):
def KangarooTwelve_ProcessLeaves_128(N, data):
assert(len(data) >= N*B)
rateInLanes = 21
rateInBytes = rateInLanes*8
Expand All @@ -172,7 +173,8 @@ def KangarooTwelve_ProcessLeaves(N, data):
# - add bytes data[j : j + 168] to the first state
# - add bytes data[8KiB + j : 8KiB + j + 168] to the second state
# - add bytes data[16KiB + j : 16KiB + j + 168] to the third state, etc.
KeccakP1600timesN_AddLanesAll(N, A, data[j:], rateInLanes, B//8)
# Note: the last param is offset in lanes, not in bytes, thus the //8
KeccakP1600timesN_AddLanesAll(N, A, data[j:], rateInLanes, B//8)
# Apply Keccak-p[1600, 12 rounds] to all states
A = KeccakP1600timesN_SIMD(N, A, 12)

Expand All @@ -191,6 +193,40 @@ def KangarooTwelve_ProcessLeaves(N, data):
CVs = bytearray().join([store64(A[0][0][i]) + store64(A[1][0][i]) + store64(A[2][0][i]) + store64(A[3][0][i]) for i in range(N)])
return CVs

def KangarooTwelve_ProcessLeaves_256(N, data):
assert(len(data) >= N*B)
rateInLanes = 17
rateInBytes = rateInLanes*8
# Initialize N all-zero states
A = [[[0 for i in range(N)] for y in range(5)] for x in range(5)]

# First, start with all the complete blocks of 17 lanes (= 136 bytes)
for j in range(0, B - rateInBytes, rateInBytes):
# Add 136 bytes from position j of the leaves to the states, offseted by 8KiB, that is:
# - add bytes data[j : j + 136] to the first state
# - add bytes data[8KiB + j : 8KiB + j + 136] to the second state
# - add bytes data[16KiB + j : 16KiB + j + 136] to the third state, etc.
KeccakP1600timesN_AddLanesAll(N, A, data[j:], rateInLanes, B//8)
# Apply Keccak-p[1600, 12 rounds] to all states
A = KeccakP1600timesN_SIMD(N, A, 12)

# Set the position of the last, incomplete, block of 4 lanes (= 32 bytes)
j = (B//rateInBytes)*rateInBytes
# Add 32 bytes from position j of the leaves to the states, offseted by 8KiB
KeccakP1600timesN_AddLanesAll(N, A, data[j:], (B - j)//8, B//8)
# Append the suffix 110 and the first bit of padding to all states
A[4][0] = [A[4][0][i] ^ 0x0B for i in range(N)]
# Append the second bit of padding to all states
A[1][3] = [A[1][3][i] ^ 0x8000000000000000 for i in range(N)]
# Apply Keccak-p[1600, 12 rounds] to all states
A = KeccakP1600timesN_SIMD(N, A, 12)

CVs = bytearray().join([
store64(A[0][0][i]) + store64(A[1][0][i]) + store64(A[2][0][i]) + store64(A[3][0][i]) +
store64(A[4][0][i]) + store64(A[0][1][i]) + store64(A[1][1][i]) + store64(A[2][1][i])
for i in range(N)])
return CVs


"""
Evaluates K12 on the given input message and customization string, and returns the desired number of output bytes.
Expand All @@ -213,7 +249,7 @@ def KangarooTwelve_ProcessLeaves(N, data):
byte array
The first outputByteLen bytes of K12(inputMessage, customizationString).
"""
def KangarooTwelve(inputMessage, customizationString, outputByteLen):
def KT128(inputMessage, customizationString, outputByteLen):
c = 256
# Concatenate the input message, the customization string and the length of the latter
S = bytearray(inputMessage) + bytearray(customizationString) + right_encode(len(customizationString))
Expand All @@ -229,17 +265,17 @@ def KangarooTwelve(inputMessage, customizationString, outputByteLen):
n = 0
# Process 8 leaves in parallel if possible
while(j + 8*B <= len(S)):
CVs = CVs + KangarooTwelve_ProcessLeaves(8, S[j:])
CVs = CVs + KangarooTwelve_ProcessLeaves_128(8, S[j:])
j = j + 8*B
n = n + 8
# Process 4 leaves in parallel if possible
while(j + 4*B <= len(S)):
CVs = CVs + KangarooTwelve_ProcessLeaves(4, S[j:])
CVs = CVs + KangarooTwelve_ProcessLeaves_128(4, S[j:])
j = j + 4*B
n = n + 4
# Process 2 leaves in parallel if possible
while(j + 2*B <= len(S)):
CVs = CVs + KangarooTwelve_ProcessLeaves(2, S[j:])
CVs = CVs + KangarooTwelve_ProcessLeaves_128(2, S[j:])
j = j + 2*B
n = n + 2
# Process the remaining leaf
Expand All @@ -251,6 +287,46 @@ def KangarooTwelve(inputMessage, customizationString, outputByteLen):
NodeStar = S[0:B] + bytearray([3,0,0,0,0,0,0,0]) + CVs \
+ right_encode(n) + b'\xFF\xFF'
return TurboSHAKE128(NodeStar, 0x06, outputByteLen)

def KT256(inputMessage, customizationString, outputByteLen):
c = 512
# Concatenate the input message, the customization string and the length of the latter
S = bytearray(inputMessage) + bytearray(customizationString) + right_encode(len(customizationString))
if (len(S) <= B):
# If S fits in one chunk, process the tree with only a final node
return TurboSHAKE256(S, 0x07, outputByteLen)
else:
# If S needs more than one chunk, process the tree with kangaroo hopping
CVs = bytearray()
# Process the leaves starting from offset 8KiB, as the first chunk is part of the final node
j = B
# Count the number of leaves
n = 0
# Process 8 leaves in parallel if possible
while(j + 8*B <= len(S)):
CVs = CVs + KangarooTwelve_ProcessLeaves_256(8, S[j:])
j = j + 8*B
n = n + 8
# Process 4 leaves in parallel if possible
while(j + 4*B <= len(S)):
CVs = CVs + KangarooTwelve_ProcessLeaves_256(4, S[j:])
j = j + 4*B
n = n + 4
# Process 2 leaves in parallel if possible
while(j + 2*B <= len(S)):
CVs = CVs + KangarooTwelve_ProcessLeaves_256(2, S[j:])
j = j + 2*B
n = n + 2
# Process the remaining leaf
while(j < len(S)):
CVs = CVs + TurboSHAKE256(S[j:j+B], 0x0B, c//8)
j = j + B
n = n + 1
# Process the final node
NodeStar = S[0:B] + bytearray([3,0,0,0,0,0,0,0]) + CVs \
+ right_encode(n) + b'\xFF\xFF'
return TurboSHAKE256(NodeStar, 0x06, outputByteLen)



# Test that KeccakP1600timesN_SIMD does what it is supposed to
Expand All @@ -266,14 +342,14 @@ def Test_KeccakP1600timesN_SIMD():
return

# Test that KangarooTwelve_ProcessLeaves does what it is supposed to
def Test_KangarooTwelve_ProcessLeaves():
def Test_KangarooTwelve_ProcessLeaves_128():
c = 256
for N in range(1, 5):
print("Testing KangarooTwelve_ProcessLeaves for N =", N)
S = bytearray([(i%247) for i in range(B*N)])
Si = [bytearray(S[i*B:(i+1)*B]) for i in range(N)]
ref_CVs = bytearray().join([TurboSHAKE128(Si[i], 0x0B, c//8) for i in range(N)])
test_CVs = KangarooTwelve_ProcessLeaves(N, S)
test_CVs = KangarooTwelve_ProcessLeaves_128(N, S)
assert(test_CVs == ref_CVs)

def outputHex(s):
Expand All @@ -285,22 +361,56 @@ def outputHex(s):
# Produce test vectors
def printK12TestVectors():
print("KangarooTwelve(M=empty, C=empty, 32 output bytes):")
outputHex(KangarooTwelve(b'', b'', 32))
outputHex(KT128(b'', b'', 32))
print("KangarooTwelve(M=empty, C=empty, 64 output bytes):")
outputHex(KangarooTwelve(b'', b'', 64))
outputHex(KT128(b'', b'', 64))
print("KangarooTwelve(M=empty, C=empty, 10032 output bytes), last 32 bytes:")
outputHex(KangarooTwelve(b'', b'', 10032)[10000:])
outputHex(KT128(b'', b'', 10032)[10000:])
for i in range(6):
C = b''
M = bytearray([(j % 251) for j in range(17**i)])
print("KangarooTwelve(M=pattern 0x00 to 0xFA for 17^{0:d} bytes, C=empty, 32 output bytes):".format(i))
outputHex(KangarooTwelve(M, C, 32))
outputHex(KT128(M, C, 32))
for i in range(4):
M = bytearray([0xFF for j in range(2**i-1)])
C = bytearray([(j % 251) for j in range(41**i)])
print("KangarooTwelve(M={0:d} times byte 0xFF, C=pattern 0x00 to 0xFA for 41^{1:d} bytes, 32 output bytes):".format(2**i-1, i))
outputHex(KangarooTwelve(M, C, 32))

Test_KeccakP1600timesN_SIMD()
Test_KangarooTwelve_ProcessLeaves()
printK12TestVectors()
outputHex(KT128(M, C, 32))

def ptn(n):
pattern = bytes(range(0xFA + 1)) # Include 0xFA
repetitions = n // len(pattern)
remainder = n % len(pattern)
repeated_pattern = pattern * repetitions + pattern[:remainder]
return repeated_pattern

# outputHex(KT256(b'', b'', 64))
# print()
# outputHex(KT256(b'', b'', 128))

# outputHex(KT256(ptn(17), b'', 64))
# print()
# outputHex(KT256(ptn(17**2), b'', 64))
# print()
# outputHex(KT256(ptn(17**3), b'', 64))
# print()
# outputHex(KT256(ptn(17**4), b'', 64))
# print()
# outputHex(KT256(ptn(17**5), b'', 64))
# print()
# outputHex(KT256(ptn(17**6), b'', 64))
# print()
# outputHex(KT256(b'\xFF\xFF\xFF\xFF\xFF\xFF\xFF', ptn(41**3), 64))
# print()
# outputHex(KT256(ptn(8191), b'', 64))
# print()
# outputHex(KT256(ptn(8192), b'', 64))
# print()
# outputHex(KT256(ptn(8192), ptn(8189), 64))
# print()
# outputHex(KT256(ptn(8192), ptn(8190), 64))


# Test_KeccakP1600timesN_SIMD()
# Test_KangarooTwelve_ProcessLeaves_128()
# printK12TestVectors()

0 comments on commit 9d54251

Please sign in to comment.