Merge pull request #13 from staticfloat/sf/sha3

Fairly big cleanup, SHA3 and bytes instead of strings!
JuliaLang · Apr 20, 2016 · e3a9f84 · e3a9f84
2 parents 90144b2 + 54b1fc1
commit e3a9f84
Show file tree

Hide file tree

Showing 10 changed files with 373 additions and 136 deletions.
diff --git a/README.md b/README.md
@@ -10,21 +10,41 @@ Usage is very straightforward:
 ```
 julia> using SHA
 
-julia> sha256("test")
+julia> bytes2hex(sha256("test"))
 "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
 ```
 
-Each exported function (at the time of this writing, only SHA-1, SHA-2 224, 256, 384 and 512 functions are implemented) takes in either an `Array{UInt8}`, a `ByteString` or an `IO` object.  This makes it trivial to checksum a file:
+Each exported function (at the time of this writing, SHA-1, SHA-2 224, 256, 384 and 512, and SHA-3 224, 256, 384 and 512 functions are implemented) takes in either an `Array{UInt8}`, a `ByteString` or an `IO` object.  This makes it trivial to checksum a file:
 
 ```
 shell> cat /tmp/test.txt
 test
 julia> using SHA
 
 julia> open("/tmp/test.txt") do f
-           sha256(f)
+           sha2_256(f)
        end
-"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
+32-element Array{UInt8,1}:
+ 0x9f
+ 0x86
+ 0xd0
+ 0x81
+ 0x88
+ 0x4c
+ 0x7d
+ 0x65
+    ⋮
+ 0x5d
+ 0x6c
+ 0x15
+ 0xb0
+ 0xf0
+ 0x0a
+ 0x08
 ```
 
 Note the lack of a newline at the end of `/tmp/text.txt`.  Julia automatically inserts a newline before the `julia>` prompt.
+
+Due to the colloquial usage of `sha256` to refer to `sha2_256`, convenience functions are provided, mapping `shaxxx()` function calls to `sha2_xxx()`.  For SHA-3, no such colloquialisms exist and the user must use the full `sha3_xxx()` names.
+
+Note that, at the time of this writing, the SHA3 code is not optimized, and as such is roughly an order of magnitude slower than SHA2.  Pull requests are welcome.
diff --git a/src/SHA.jl b/src/SHA.jl
@@ -1,39 +1,57 @@
-isdefined(Base, :__precompile__) && __precompile__()
+#isdefined(Base, :__precompile__) && __precompile__()
 
 module SHA
 
 using Compat
 
-export sha1, sha224, sha256, sha384, sha512
+# Export convenience functions, context types, update!() and digest!() functions
+export sha1, SHA1_CTX, update!, digest!
+export sha224, sha256, sha384, sha512
+export sha2_224, sha2_256, sha2_384, sha2_512
+export sha3_224, sha3_256, sha3_384, sha3_512
+export SHA224_CTX, SHA256_CTX, SHA384_CTX, SHA512_CTX
+export SHA2_224_CTX, SHA2_256_CTX, SHA2_384_CTX, SHA2_512_CTX
+export SHA3_224_CTX, SHA3_256_CTX, SHA3_384_CTX, SHA3_512_CTX
+
 
 include("constants.jl")
 include("types.jl")
 include("base_functions.jl")
 include("sha1.jl")
 include("sha2.jl")
+include("sha3.jl")
 include("common.jl")
 
-
 # Create data types and convenience functions for each hash implemented
 for (f, ctx) in [(:sha1, :SHA1_CTX),
                  (:sha224, :SHA224_CTX),
                  (:sha256, :SHA256_CTX),
                  (:sha384, :SHA384_CTX),
-                 (:sha512, :SHA512_CTX)]
+                 (:sha512, :SHA512_CTX),
+                 (:sha2_224, :SHA2_224_CTX),
+                 (:sha2_256, :SHA2_256_CTX),
+                 (:sha2_384, :SHA2_384_CTX),
+                 (:sha2_512, :SHA2_512_CTX),
+                 (:sha3_224, :SHA3_224_CTX),
+                 (:sha3_256, :SHA3_256_CTX),
+                 (:sha3_384, :SHA3_384_CTX),
+                 (:sha3_512, :SHA3_512_CTX),]
     @eval begin
-        # Allows things like:
-        # open("test.txt") do f
-        #     sha256(f)
-        # done
-        function $f(io::IO)
+        # Our basic function is to process arrays of bytes
+        function $f(data::Array{UInt8,1})
             ctx = $ctx()
-            update!(ctx, readbytes(io));
-            return bytes2hex(digest!(ctx))
+            update!(ctx, data);
+            return digest!(ctx)
         end
 
-        # Allows the same as above, but on ByteStrings and Arrays
-        $f(str::ByteString) = $f(IOBuffer(str))
-        $f(arr::Array{UInt8,1}) = $f(IOBuffer(arr))
+        # ByteStrings are a pretty handy thing to be able to crunch through
+        $f(str::ByteString) = $f(str.data)
+
+        # Convenience function for IO devices, allows for things like:
+        # open("test.txt") do f
+        #     sha256(f)
+        # done
+        $f(io::IO) = $f(readbytes(io))
     end
 end
 

diff --git a/src/base_functions.jl b/src/base_functions.jl
@@ -17,6 +17,8 @@ R(b,x)          = (x >> b)
 S32(b,x)        = rrot(b,x,32)
 # 64-bit Rotate-right (used in SHA-384 and SHA-512):
 S64(b,x)        = rrot(b,x,64)
+# 64-bit Rotate-left (used in SHA3)
+L64(b,x)        = lrot(b,x,64)
 
 # Two of six logical functions used in SHA-256, SHA-384, and SHA-512:
 Ch(x,y,z)  = ((x & y) $ (~x & z))

diff --git a/src/common.jl b/src/common.jl
@@ -2,66 +2,39 @@
 
 # update! takes in variable-length data, buffering it into blocklen()-sized pieces,
 # calling transform!() when necessary to update the internal hash state.
-function update!{T<:SHA_CTX}(context::T, data::Array{UInt8,1})
-    if length(data) == 0
-        return
-    end
+function update!{T<:Union{SHA1_CTX,SHA2_CTX,SHA3_CTX}}(context::T, data::Array{UInt8,1})
+    # We need to do all our arithmetic in the proper bitwidth
+    UIntXXX = typeof(context.bytecount)
 
-    data_idx = 0
-    len = convert(typeof(context.bytecount), length(data))
+    # Process as many complete blocks as possible
+    len = UIntXXX(length(data))
+    data_idx = UIntXXX(0)
     usedspace = context.bytecount % blocklen(T)
-    if usedspace > 0
-        # Calculate how much free space is available in the buffer
-        freespace = blocklen(T) - usedspace
-
-        if len >= freespace
-            # Fill the buffer completely and process it
-            for i in 1:freespace
-                context.buffer[usedspace + i] = data[data_idx + i]
-            end
-
-            # Round bytecount up to the nearest blocklen
-            context.bytecount += freespace
-            data_idx += freespace
-            len -= freespace
-            transform!(context)
-        else
-            # The buffer is not yet full
-            for i = 1:len
-                context.buffer[usedspace + i] = data[data_idx + i]
-            end
-            context.bytecount += len
-            return
+    while len - data_idx + usedspace >= blocklen(T)
+        # Fill up as much of the buffer as we can with the data given us
+        for i in 1:(blocklen(T) - usedspace)
+            context.buffer[usedspace + i] = data[data_idx + i]
         end
-    end
 
-
-    # Process as many complete blocks as possible, now that the buffer is full
-    data_idx = one(len)
-    while len - (data_idx - 1) >= blocklen(T)
-        for i in 1:blocklen(T)
-            context.buffer[i] = data[data_idx + i - 1]
-        end
         transform!(context)
-        data_idx += blocklen(T)
+        context.bytecount += blocklen(T) - usedspace
+        data_idx += blocklen(T) - usedspace
+        usedspace = UIntXXX(0)
     end
-    context.bytecount += (data_idx - 1)
 
-    # If there are leftovers, save them in buffer until next update!() or digest!()
-    if data_idx < len
-        # There's left-overs, so save 'em
-        for i = 1:(len - data_idx + 1)
-            context.buffer[i] = data[data_idx + i - 1]
+    # There is less than a complete block left, but we need to save the leftovers into context.buffer:
+    if len > data_idx
+        for i = 1:(len - data_idx)
+            context.buffer[usedspace + i] = data[data_idx + i]
         end
-        context.bytecount += (len - data_idx + 1)
+        context.bytecount += len - data_idx
     end
 end
 
 
 # Clear out any saved data in the buffer, append total bitlength, and return our precious hash!
-function digest!{T<:SHA_CTX}(context::T)
+function digest!{T<:Union{SHA1_CTX,SHA2_CTX}}(context::T)
     usedspace = context.bytecount % blocklen(T)
-
     # If we have anything in the buffer still, pad and transform that data
     if usedspace > 0
         # Begin padding with a 1 bit:

diff --git a/src/constants.jl b/src/constants.jl
@@ -33,13 +33,13 @@ const K256 = UInt32[
 ]
 
 # Initial hash value H for SHA-224:
-const SHA224_initial_hash_value = UInt32[
+const SHA2_224_initial_hash_value = UInt32[
     0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
     0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
 ]
 
 
-const SHA256_initial_hash_value = UInt32[
+const SHA2_256_initial_hash_value = UInt32[
     0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
     0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
 ]
@@ -89,17 +89,41 @@ const K512 = UInt64[
 ]
 
 # Initial hash value H for SHA-384
-const SHA384_initial_hash_value = UInt64[
+const SHA2_384_initial_hash_value = UInt64[
     0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
     0x9159015a3070dd17, 0x152fecd8f70e5939,
     0x67332667ffc00b31, 0x8eb44a8768581511,
     0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4
 ]
 
 # Initial hash value H for SHA-512
-const SHA512_initial_hash_value = UInt64[
+const SHA2_512_initial_hash_value = UInt64[
     0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
     0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
     0x510e527fade682d1, 0x9b05688c2b3e6c1f,
     0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
 ]
+
+# Round constants for SHA3 rounds
+const SHA3_ROUND_CONSTS = UInt64[
+    0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
+    0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
+    0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
+    0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
+    0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
+    0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
+    0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
+    0x8000000000008080, 0x0000000080000001, 0x8000000080008008
+]
+
+# Rotation constants for SHA3 rounds
+const SHA3_ROTC = UInt64[
+    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
+    27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
+]
+
+# Permutation indices for SHA3 rounds (+1'ed so as to work with julia's 1-based indexing)
+const SHA3_PILN = UInt64[
+    11, 8,  12, 18, 19, 4, 6,  17, 9,  22, 25, 5,
+    16, 24, 20, 14, 13, 3, 21, 15, 23, 10,  7,  2
+]
diff --git a/src/sha2.jl b/src/sha2.jl
@@ -1,4 +1,4 @@
-function transform!{T<:SHA2_CTX_SMALL}(context::T)
+function transform!{T<:Union{SHA2_224_CTX,SHA2_256_CTX}}(context::T)
     buffer = reinterpret(eltype(context.state), context.buffer)
     # Initialize registers with the previous intermediate values (our state)
     a = context.state[1]
@@ -65,7 +65,7 @@ function transform!{T<:SHA2_CTX_SMALL}(context::T)
 end
 
 
-function transform!(context::SHA2_CTX_BIG)
+function transform!(context::Union{SHA2_384_CTX,SHA2_512_CTX})
     buffer = reinterpret(eltype(context.state), context.buffer)
     # Initialize registers with the prev. intermediate value
     a = context.state[1]

diff --git a/src/sha3.jl b/src/sha3.jl
@@ -0,0 +1,76 @@
+function transform!{T<:SHA3_CTX}(context::T)
+    # First, update state with buffer
+    buffer_as_uint64 = reinterpret(eltype(context.state), context.buffer)
+    for idx in 1:div(blocklen(T),8)
+        context.state[idx] $= buffer_as_uint64[idx]
+    end
+    bc = Array{UInt64,1}(5)
+
+    # We always assume 24 rounds
+    for round in 0:23
+        # Theta function
+        for i in 1:5
+            bc[i] = context.state[i] $ context.state[i + 5] $ context.state[i + 10] $ context.state[i + 15] $ context.state[i + 20]
+        end
+
+        for i in 1:5
+            temp = bc[mod1(i + 4, 5)] $ L64(1, bc[mod1(i + 1, 5)])
+            for j in 0:5:20
+                context.state[i + j] $= temp
+            end
+        end
+
+        # Rho Pi
+        temp = context.state[2]
+        for i in 1:24
+            j = SHA3_PILN[i]
+            bc[1] = context.state[j]
+            context.state[j] = L64(SHA3_ROTC[i], temp)
+            temp = bc[1]
+        end
+
+        # Chi
+        for j in 0:5:20
+            for i in 1:5
+                bc[i] = context.state[i + j]
+            end
+            for i in 1:5
+                context.state[j + i] $= (~bc[mod1(i + 1, 5)] & bc[mod1(i + 2, 5)])
+            end
+        end
+
+        # Iota
+        context.state[1] $= SHA3_ROUND_CONSTS[round+1]
+    end
+
+    return context.state
+end
+
+
+
+# Finalize data in the buffer, append total bitlength, and return our precious hash!
+function digest!{T<:SHA3_CTX}(context::T)
+    usedspace = context.bytecount % blocklen(T)
+    # If we have anything in the buffer still, pad and transform that data
+    if usedspace < blocklen(T) - 1
+        # Begin padding with a 0x06
+        context.buffer[usedspace+1] = 0x06
+        # Fill with zeros up until the last byte
+        context.buffer[usedspace+2:end-1] = 0x00
+        # Finish it off with a 0x80
+        context.buffer[end] = 0x80
+    else
+        # Otherwise, we have to add on a whole new buffer just for the zeros and 0x80
+        context.buffer[end] = 0x06
+        transform!(context)
+
+        context.buffer[1:end-1] = 0x0
+        context.buffer[end] = 0x80
+    end
+
+    # Final transform:
+    transform!(context)
+
+    # Return the digest
+    return reinterpret(UInt8, context.state)[1:digestlen(T)]
+end