Skip to content

Commit

Permalink
Simpler:
Browse files Browse the repository at this point in the history
ref: [Simpler · Issue #11 · sqids/sqids-spec](sqids/sqids-spec#11)
  • Loading branch information
antimon2 committed Sep 6, 2023
1 parent f7adbb1 commit d97d19b
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 187 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,31 +26,31 @@ Simple encode & decode:

```julia
config = Sqids.configure()
id = Sqids.encode(config, [1, 2, 3]) #> "8QRLaD"
id = Sqids.encode(config, [1, 2, 3]) #> "86Rf07"
numbers = Sqids.decode(config, id) #> [1, 2, 3]
```

Randomize IDs by providing a custom alphabet:

```julia
config = Sqids.configure(alphabet="FxnXM1kBN6cuhsAvjW3Co7l2RePyY8DwaU04Tzt9fHQrqSVKdpimLGIJOgb5ZE")
id = Sqids.encode(config, [1, 2, 3]) #> "B5aMa3"
id = Sqids.encode(config, [1, 2, 3]) #> "B4aajs"
numbers = Sqids.decode(config, id) #> [1, 2, 3]
```

Enforce a *minimum* length for IDs:

```julia
config = Sqids.configure(minLength=10)
id = Sqids.encode(config, [1, 2, 3]) #> "75JT1cd0dL"
id = Sqids.encode(config, [1, 2, 3]) #> "86Rf07xd4z"
numbers = Sqids.decode(config, id) #> [1, 2, 3]
```

Prevent specific words from appearing anywhere in the auto-generated IDs:

```julia
config = Sqids.configure(blocklist=["word1","word2"])
id = Sqids.encode(config, [1, 2, 3]) #> "8QRLaD"
id = Sqids.encode(config, [1, 2, 3]) #> "86Rf07"
numbers = Sqids.decode(config, id) #> [1, 2, 3]
```

Expand All @@ -60,7 +60,7 @@ If `strict=false` is set when configuring, it enables handling of limitless valu

```julia
config = Sqids.configure(strict=false) # not-strict mode
id = Sqids.encode(config, Int128[9223372036854775808]) #> "piF3yT7tOtoO"
id = Sqids.encode(config, Int128[9223372036854775808]) #> "pXFNc5r689z6"
numbers = Sqids.decode(config, id) #> Int128[9223372036854775808]
```

Expand Down
130 changes: 52 additions & 78 deletions src/Sqids.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
module Sqids

export encode, decode, minValue, maxValue
export encode, decode

using Base.Checked: mul_with_overflow, add_with_overflow

include("Blocklists.jl")

const DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
const MIN_VALUE = 0
const MIN_LENGTH_LIMIT = 1_000

_shuffle(alphabet::AbstractString) = String(_shuffle!(collect(alphabet)))
function _shuffle!(chars::Vector{Char})
Expand All @@ -24,7 +25,7 @@ end
Sqids.Configuration
Sqids' parameter-configuration.
Be sure to place the instance as the 1st argument of [`encode`](@ref), [`decode`](@ref), [`minValue`](@ref) (and [`maxValue`](@ref)).
Be sure to place the instance as the 1st argument of [`encode`](@ref) and [`decode`](@ref).
See also: [`configure`](@ref)
"""
Expand All @@ -34,14 +35,15 @@ struct Configuration{S}
blocklist::Set{String}
function Configuration(alphabet::AbstractString, minLength::Int, blocklist, strict::Bool = true)
# @assert blocklist isa Union{AbstractSet{<:AbstractString}, AbstractArray{<:AbstractString}}
length(alphabet) < 5 && throw(ArgumentError("Alphabet length must be at least 5."))
sizeof(alphabet) == length(alphabet) || throw(ArgumentError("Alphabet cannot contain multibyte characters."))
length(alphabet) < 3 && throw(ArgumentError("Alphabet length must be at least 3."))
length(unique(alphabet)) == length(alphabet) || throw(ArgumentError("Alphabet must contain unique characters."))
MIN_VALUE minLength length(alphabet) || throw(ArgumentError("Minimum length has to be between $(MIN_VALUE) and $(length(alphabet))."))
0 minLength MIN_LENGTH_LIMIT || throw(ArgumentError("Minimum length has to be between 0 and $(MIN_LENGTH_LIMIT)."))

# clean up blocklist:
# 1. all blocklist words should be lowercase
# 2. no words less than 3 chars
# 3. if some words contain chars that are not in the alphabet, remove those
# clean up blocklist:
# 1. all blocklist words should be lowercase
# 2. no words less than 3 chars
# 3. if some words contain chars that are not in the alphabet, remove those
alphabet_chars = Set(lowercase(alphabet))
filteredBlocklist = Set(filter(blocklist .|> lowercase) do word
length(word) 3 && issetequal(word alphabet_chars, word)
Expand Down Expand Up @@ -93,88 +95,78 @@ Encode the passed `numbers` to an id.
# Example
```julia-repl
julia> encode(Sqids.configure(), [1, 2, 3])
"8QRLaD"
"86Rf07"
```
"""
function encode(config::Configuration, numbers::AbstractArray{<:Integer})
isempty(numbers) && return ""
# don't allow out-of-range numbers [might be lang-specific]
all((minValue(config)), numbers) || throw(ArgumentError("Encoding supports numbers greater than or equal to $(minValue(config))"))
_encode_numbers(config, numbers, false)
all((MIN_VALUE), numbers) || throw(ArgumentError("Encoding supports numbers greater than or equal to $(MIN_VALUE)."))
_encode_numbers(config, numbers, 0)
end
function encode(config::Configuration{true}, numbers::AbstractArray{<:Integer})
isempty(numbers) && return ""
# don't allow out-of-range numbers [might be lang-specific]
all(numbers) do num
minValue(config) num maxValue(config)
end || throw(ArgumentError("Encoding supports numbers between $(minValue(config)) and $(maxValue(config))"))
_encode_numbers(config, numbers, false)
MIN_VALUE num maxValue(config)
end || throw(ArgumentError("Encoding supports numbers between $(MIN_VALUE) and $(maxValue(config))."))
_encode_numbers(config, numbers, 0)
end
function _encode_numbers(config::Configuration, numbers::AbstractArray{<:Integer}, partitioned::Bool = false)
function _encode_numbers(config::Configuration, numbers::AbstractArray{<:Integer}, increment::Int = 0)
# if increment is greater than alphabet length, we've reached max attempts
if increment > length(config.alphabet)
throw(ArgumentError("Reached max attempts to re-generate the ID."))
end

# get a semi-random offset from input numbers
# offset = foldl((a, (i, v)) -> a + Int(config.alphabet[v % length(config.alphabet) + 1]) + i, enumerate(numbers), init=0) % length(config.alphabet)
# offset = foldl((a, (i, v)) -> a + Int(config.alphabet[v % length(config.alphabet) + 1]) + i, enumerate(numbers), init=increment) % length(config.alphabet)
# ↓ a little faster
offset = 0
offset = increment
for (i, v) in pairs(numbers)
offset += Int(config.alphabet[v % length(config.alphabet) + 1]) + i
end
offset %= length(config.alphabet)

# prefix is the first character in the generated ID, used for randomization
# partition is the character used instead of the first separator to indicate that the first number in the input array is a throwaway number. this character is used only once to handle blocklist and/or padding. it's omitted completely in all other cases
# alphabet should not contain `prefix` or `partition` reserved characters
# reverse alphabet (otherwise for [0, x] `offset` and `separator` will be the same char)
alphabet_chars = collect(config.alphabet)[[offset+1:end; begin:offset]]
prefix = popfirst!(alphabet_chars)
partition = popfirst!(alphabet_chars)
prefix = alphabet_chars[begin]
reverse!(alphabet_chars)

id = sprint(sizehint=2*length(numbers)) do io
print(io, prefix)
# encode input array
for (i, num) in pairs(numbers)
# the last character of the alphabet is going to be reserved for the `separator`
alphabetWithoutSeparator = @view alphabet_chars[begin:end-1]
# the first character of the alphabet is going to be reserved for the `separator`
alphabetWithoutSeparator = @view alphabet_chars[begin+1:end]
print(io, _to_id(num, alphabetWithoutSeparator))
if i < length(numbers)
# prefix is used only for the first number
# separator = alphabet[end]
# for the barrier use the `separator` unless this is the first iteration and the first number is a throwaway number - then use the `partition` character
print(io, partitioned && i == 1 ? partition : alphabet_chars[end])
# `separator` character is used to isolate numbers within the ID
print(io, alphabet_chars[begin])

# shuffle on every iteration
_shuffle!(alphabet_chars)
end
end
end

# if `minLength` is used and the ID is too short, add a throwaway number
# handle `minLength` requirement, if the ID is too short
if config.minLength > length(id)
# partitioning is required so we can safely throw away chunk of the ID during decoding
if !partitioned
partitioned_numbers = [zero(eltype(numbers)); numbers]
id = _encode_numbers(config, partitioned_numbers, true)
end

# if adding a `partition` number did not make the length meet the `minLength` requirement, then make the new id this format: `prefix` character + a slice of the alphabet to make up the missing length + the rest of the ID without the `prefix` character
if config.minLength > length(id)
id = id[begin] * join(alphabet_chars[begin:config.minLength - length(id)]) * id[2:end]
# append a separator
id *= alphabet_chars[begin]

# keep appending `separator` + however much alphabet is needed
# for decoding: two separators next to each other is what tells us the rest are junk characters
while length(id) < config.minLength
_shuffle!(alphabet_chars)
id *= join(alphabet_chars[begin:min(config.minLength - length(id), length(alphabet_chars))])
end
end

# if ID has a blocked word anywhere, add a throwaway number & start over
# if ID has a blocked word anywhere, restart with a +1 increment
if _is_blocked_id(config, id)
if partitioned
# c8 ignore next 2
if isstrict(config) && numbers[1] == maxValue(config)
throw(ArgumentError("Ran out of range checking against the blocklist"))
else
numbers[1] += 1
id = _encode_numbers(config, numbers, true)
end
else
partitioned_numbers = [zero(eltype(numbers)); numbers]
id = _encode_numbers(config, partitioned_numbers, true)
end
id = _encode_numbers(config, numbers, increment + 1)
end

return id
Expand Down Expand Up @@ -220,7 +212,7 @@ Restore a numbers list from the passed `id`.
# Example
```julia-repl
julia> decode(Sqids.configure(), "8QRLaD")
julia> decode(Sqids.configure(), "86Rf07")
3-element Array{Int64,1}:
1
2
Expand All @@ -246,29 +238,23 @@ function decode(config::Configuration, id::AbstractString)
offset = findfirst(==(prefix), config.alphabet)

# re-arrange alphabet back into it's original form
# `partition` character is in second position
# alphabet has to be without reserved `prefix` & `partition` characters
alphabet_chars = collect(config.alphabet)[[offset+1:end; begin:offset-1]]
partition = popfirst!(alphabet_chars)
# reverse alphabet
alphabet_chars = collect(config.alphabet)[[offset:end; begin:offset-1]]
reverse!(alphabet_chars)

# now it's safe to remove the prefix character from ID, it's not needed anymore
id_wk = @view id[begin+1:end]

# if this ID contains the `partition` character (between 1st position and non-last position), throw away everything to the left of it, include the `partition` character
partition_index = findfirst(==(partition), id_wk)
if !isnothing(partition_index) && partition_index > 1 && partition_index < length(id_wk)
id_wk = @view id_wk[partition_index+1:end]
alphabet_chars = _shuffle!(alphabet_chars)
end

# decode
while !isempty(id_wk)
separator = alphabet_chars[end]
separator = alphabet_chars[begin]
chunks = split(id_wk, separator, limit=2)
# if chunk is empty, we are done (the rest are junk characters)
isempty(chunks[1]) && return ret
# decode the number without using the `separator` character
# but also check that ID can be decoded (eg: does not contain any non-alphabet characters)
alphabetWithoutSeparator = @view alphabet_chars[begin:end-1]
chunks[1] alphabetWithoutSeparator || return Int[]
# # but also check that ID can be decoded (eg: does not contain any non-alphabet characters)
alphabetWithoutSeparator = @view alphabet_chars[begin+1:end]
# chunks[1] ⊆ alphabetWithoutSeparator || return Int[]
# push!(ret, _to_number(config, chunks[1], alphabetWithoutSeparator))
num = _to_number(config, chunks[1], alphabetWithoutSeparator)
if !isstrict(config)
Expand Down Expand Up @@ -317,23 +303,11 @@ function _to_number(config::Configuration, id::AbstractString, init::I, alphabet
result
end

"""
minValue(config::Sqids.Configuration)
Return the minimum value available with Sqids.
Always returns `0`.
See also: [`maxValue`](@ref)
"""
minValue(::Configuration) = MIN_VALUE

"""
maxValue(config::Sqids.Configuration)
Return the maximum value available with Sqids.
Returns `typemax(Int)` if Strict mode, or throws an `MethodError` otherwise.
See also: [`minValue`](@ref)
"""
maxValue(::Configuration{true}) = typemax(Int)

Expand Down
27 changes: 24 additions & 3 deletions test/alphabet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@ using Test
config = Sqids.configure(alphabet="0123456789abcdef")

numbers = [1, 2, 3]
id = "4d9fd2"
id = "489158"

@test Sqids.encode(config, numbers) == id
@test Sqids.decode(config, id) == numbers
end

@testset "short alphabet" begin
config = Sqids.configure(alphabet="abcde")
config = Sqids.configure(alphabet="abc")

numbers = [1, 2, 3]
@test Sqids.decode(config, Sqids.encode(config, numbers)) == numbers
Expand All @@ -29,15 +29,36 @@ using Test
@test Sqids.decode(config, Sqids.encode(config, numbers)) == numbers
end

@testset "multibyte characters" begin
@test_throws ArgumentError begin
Sqids.configure(alphabet="ë1092")
end
@static if VERSION v"1.8.0"
@test_throws "Alphabet cannot contain multibyte characters" begin
Sqids.configure(alphabet="ë1092")
end
end
end

@testset "repeating alphabet characters" begin
@test_throws ArgumentError begin
Sqids.configure(alphabet="aabcdefg")
end
@static if VERSION v"1.8.0"
@test_throws "Alphabet must contain unique characters" begin
Sqids.configure(alphabet="aabcdefg")
end
end
end

@testset "too short of an alphabet" begin
@test_throws ArgumentError begin
Sqids.configure(alphabet="abcd")
Sqids.configure(alphabet="ab")
end
@static if VERSION v"1.8.0"
@test_throws "Alphabet length must be at least 3" begin
Sqids.configure(alphabet="ab")
end
end
end

Expand Down
Loading

0 comments on commit d97d19b

Please sign in to comment.