-
Notifications
You must be signed in to change notification settings - Fork 2
/
Hash.hs
66 lines (58 loc) · 2.28 KB
/
Hash.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
{-# LANGUAGE TemplateHaskell #-}
-- | The hash algorithm used for Candid field names
--
-- Also includes a function that tries to reverse the hash, first using an
-- English word list, and then a brute force approach.
module Codec.Candid.Hash
( candidHash
, invertHash
) where
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as BS
import qualified Data.IntMap as M
import Data.Maybe
import Data.Char
import Data.Word
import Data.FileEmbed
-- | The Candid field label hashing algorithm
candidHash :: T.Text -> Word32
candidHash s = BS.foldl (\h c -> h * 223 + fromIntegral c) 0 $ BS.fromStrict $ T.encodeUtf8 s
-- | Inversion of the Candid field label hash
invertHash :: Word32 -> Maybe T.Text
invertHash w32 | w32 < 32 = Nothing
-- leave small numbers alone, tend to be tuple indicies
invertHash w32 | Just t <- M.lookup (fromIntegral w32) m = Just t
-- try the word list
invertHash w32 = listToMaybe guesses
where
x = fromIntegral w32 :: Word64
chars = ['a'..'z'] ++ ['_']
ords = 0 : map (fromIntegral . ord) chars
init_chars = chars ++ [ 'A'..'Z' ]
init_ords = 0 : map (fromIntegral . ord) init_chars
non_mod x = x - (x `mod` 2^(32::Int))
guesses =
[ T.pack $ reverse guess
| c8 <- init_ords, c7 <- ords, c6 <- ords, c5 <- ords
-- It seems that 8 characters are enough to invert anything
-- (based on quickchecking)
-- Set up so that short guesses come first
, let high_chars = c5 * 223^(4::Int) + c6 * 223^(5::Int) + c7 * 223^(6::Int) + c8 * 223^(7::Int)
, let guess = simple $ x + non_mod high_chars
, all (`elem` init_chars) (take 1 guess)
, all (`elem` chars) (drop 1 guess)
]
-- inverts the Hash if the hash was created without modulos
-- returns string in reverse order
simple :: Word64 -> String
simple 0 = ""
simple x = chr (fromIntegral b) : simple a
where (a, b) = x `divMod` 223
-- Word list obtained from https://github.com/dwyl/english-words
wordFile :: T.Text
wordFile = $(makeRelativeToProject "words.txt" >>= embedStringFile)
m :: M.IntMap T.Text
m = M.fromList [ (fromIntegral (candidHash w), w) | w <- word_list ]
where
word_list = T.lines wordFile ++ map T.toTitle (T.lines wordFile)