Skip to content

Commit

Permalink
Merge pull request #16 from oxinabox/ox/eq
Browse files Browse the repository at this point in the history
Make equality checks faster and fix hash
  • Loading branch information
xiaodaigh authored Oct 31, 2020
2 parents f7d4f41 + 3296160 commit 0f4f72c
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 99 deletions.
31 changes: 25 additions & 6 deletions src/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ end

String(s::ShortString) = String(reinterpret(UInt8, [s.size_content|>ntoh])[1:sizeof(s)])

Base.codeunit(s::ShortString) = codeunit(String(s))
Base.codeunit(s::ShortString) = UInt8
Base.codeunit(s::ShortString, i) = codeunits(String(s), i)
Base.codeunit(s::ShortString, i::Integer) = codeunit(String(s), i)
Base.codeunits(s::ShortString) = codeunits(String(s))
Expand All @@ -52,18 +52,19 @@ Base.isvalid(s::ShortString, i::Integer) = isvalid(String(s), i)
Base.iterate(s::ShortString) = iterate(String(s))
Base.iterate(s::ShortString, i::Integer) = iterate(String(s), i)
Base.lastindex(s::ShortString) = sizeof(s)
Base.ncodeunits(s::ShortString) = ncodeunits(String(s))
Base.ncodeunits(s::ShortString) = sizeof(s)
Base.print(s::ShortString) = print(String(s))
Base.show(io::IO, str::ShortString) = show(io, String(str))
Base.sizeof(s::ShortString{T}) where T = Int(s.size_content & size_mask(T))
Base.sizeof(s::ShortString{T}) where T = Int(s.size_content & (size_mask(s) % UInt))

size_nibbles(::Type{<:Union{UInt16, UInt32, UInt64, UInt128}}) = 1
size_nibbles(::Type{<:Union{Int16, Int32, Int64, Int128}}) = 1
size_nibbles(::Type{<:Union{UInt256, UInt512, UInt1024}}) = 2
size_nibbles(::Type{<:Union{Int256, Int512, Int1024}}) = 2
size_nibbles(::Type{T}) where T = ceil(log2(sizeof(T))/4)

size_mask(T) = UInt(exp2(4*size_nibbles(T)) - 1)
size_mask(T) = T(exp2(4*size_nibbles(T)) - 1)
size_mask(s::ShortString{T}) where T = size_mask(T)


# function Base.getindex(s::ShortString, i::Integer)
Expand All @@ -76,9 +77,27 @@ size_mask(T) = UInt(exp2(4*size_nibbles(T)) - 1)

Base.collect(s::ShortString) = collect(String(s))

==(s::ShortString, b::AbstractString) = begin
String(s) == b
function ==(s::ShortString{S}, b::Union{String, SubString{String}}) where S
ncodeunits(b) == ncodeunits(s) || return false
return s == ShortString{S}(b)
end
function ==(s::ShortString, b::AbstractString)
# Could be a string type that might not use UTF8 encoding and that we don't have a
# constructor for. Defer to equality that type probably has defined on `String`
return String(s) == b
end

==(a::AbstractString, b::ShortString) = b == a
function ==(a::ShortString{S}, b::ShortString{S}) where S
return a.size_content == b.size_content
end
function ==(a::ShortString{A}, b::ShortString{B}) where {A,B}
ncodeunits(a) == ncodeunits(b) || return false
# compare if equal after dropping size bits and
# flipping so that the empty bytes are at the start
ntoh(a.size_content & ~size_mask(A)) == ntoh(b.size_content & ~size_mask(B))
end


function Base.cmp(a::ShortString{S}, b::ShortString{S}) where S
return cmp(a.size_content, b.size_content)
Expand Down
4 changes: 1 addition & 3 deletions src/hash.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,4 @@ export hash

import Base.hash

Base.hash(x::ShortString, args...; kwargs...) = hash(x.size_content, args...; kwargs...)

Base.hash(x::ShortString, h::UInt) = hash(x.size_content, h)
Base.hash(x::ShortString, h::UInt) = hash(String(x), h)
5 changes: 0 additions & 5 deletions test/hash.jl

This file was deleted.

192 changes: 107 additions & 85 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,85 +1,107 @@
using ShortStrings
using BitIntegers: UInt256, UInt512, UInt1024, @define_integers
using Test, Random

include("getindex.jl")
include("hash.jl")

function basic_test(constructor, max_len)
@testset "$constructor" begin
for string_type in (String, SubString{String})
@testset "$string_type" begin
basic_test(string_type, constructor, max_len)
end
end
end
end

function basic_test(string_type, constructor, max_len)
r = string_type.(randstring.(1:max_len))
@test all(constructor.(r) .== r)
a = constructor.(r)
@test fsort(a) |> issorted

@test collect(constructor("z"^max_len)) == fill('z', max_len)
@test_throws ErrorException constructor("a"^(max_len+1))
end


basic_test(ShortString3, 3)
basic_test(ShortString7, 7)
basic_test(ShortString15, 15)
basic_test(ShortString30, 30)
basic_test(ShortString62, 62)
basic_test(ShortString126, 126)

basic_test(ShortString{UInt16}, 1)
basic_test(ShortString{UInt32}, 3)
basic_test(ShortString{UInt64}, 7)
basic_test(ShortString{UInt128}, 15)
basic_test(ShortString{UInt256}, 30)
basic_test(ShortString{UInt512}, 62)
basic_test(ShortString{UInt1024}, 126)

@define_integers 2048 MyInt2048 MyUInt2048
basic_test(ShortString{MyUInt2048}, 254)

@test ss126"Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long." === ShortString126("Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long.")
@test ss62"Basically a failly long string really" === ShortString62("Basically a failly long string really")
@test ss30"A Longer String!!!" === ShortString30("A Longer String!!!")

@test ss15"Short String!!!" === ShortString15("Short String!!!")
@test ss7"ShrtStr" === ShortString7("ShrtStr")
@test ss3"ss3" === ShortString3("ss3")


@testset "cmp" begin
@test cmp(ShortString3("abc"), ShortString3("abc")) == 0
@test cmp(ShortString3("ab"), ShortString3("abc")) == -1
@test cmp(ShortString3("abc"), ShortString3("ab")) == 1
@test cmp(ShortString3("ab"), ShortString3("ac")) == -1
@test cmp(ShortString3("ac"), ShortString3("ab")) == 1
@test cmp(ShortString3("α"), ShortString3("a")) == 1
@test cmp(ShortString3("b"), ShortString3("β")) == -1

@test cmp(ShortString3("abc"), "abc") == 0
@test cmp(ShortString3("ab"), "abc") == -1
@test cmp(ShortString3("abc"), "ab") == 1
@test cmp(ShortString3("ab"), "ac") == -1
@test cmp(ShortString3("ac"), "ab") == 1
@test cmp(ShortString3("α"), "a") == 1
@test cmp(ShortString3("b"), "β") == -1
end

@testset "Construction from other ShortStrings" begin
@test ShortString7(ShortString3("ab")) == "ab"
@test ShortString7(ShortString3("ab")) isa ShortString7

@test ShortString3(ShortString7("ab")) == "ab"
@test ShortString3(ShortString7("ab")) isa ShortString3

@test ShortString7(ShortString7("ab")) == "ab"
@test ShortString7(ShortString7("ab")) isa ShortString7

@test_throws ErrorException ShortString3(ShortString7("123456"))
end
using ShortStrings
using BitIntegers: UInt256, UInt512, UInt1024, @define_integers
using Test, Random

include("getindex.jl")

function basic_test(constructor, max_len)
@testset "$constructor" begin
for string_type in (String, SubString{String})
@testset "$string_type" begin
basic_test(string_type, constructor, max_len)
end
end
end
end

function basic_test(string_type, constructor, max_len)
r = string_type.(randstring.(1:max_len))
@test all(constructor.(r) .== r)
@test all(hash(constructor.(r)) .== hash(r))
a = constructor.(r)
@test fsort(a) |> issorted

@test collect(constructor("z"^max_len)) == fill('z', max_len)
@test_throws ErrorException constructor("a"^(max_len+1))

# equality
@test constructor("c"^max_len) == "c"^max_len
@test "c"^max_len == constructor("c"^max_len)
@test constructor("c"^max_len) == constructor("c"^max_len)
@test constructor("c"^max_len) != constructor("d"^max_len)
@test constructor("c"^max_len) != constructor("c"^(max_len-1))
@test constructor("c"^(max_len-1)) != constructor("c"^max_len)
@test constructor("c"^max_len) != "c"^(max_len-1)
@test constructor("c"^(max_len-1)) != "c"^max_len
end


basic_test(ShortString3, 3)
basic_test(ShortString7, 7)
basic_test(ShortString15, 15)
basic_test(ShortString30, 30)
basic_test(ShortString62, 62)
basic_test(ShortString126, 126)

basic_test(ShortString{UInt16}, 1)
basic_test(ShortString{UInt32}, 3)
basic_test(ShortString{UInt64}, 7)
basic_test(ShortString{UInt128}, 15)
basic_test(ShortString{UInt256}, 30)
basic_test(ShortString{UInt512}, 62)
basic_test(ShortString{UInt1024}, 126)

@define_integers 2048 MyInt2048 MyUInt2048
basic_test(ShortString{MyUInt2048}, 254)

@test ss126"Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long." === ShortString126("Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long.")
@test ss62"Basically a failly long string really" === ShortString62("Basically a failly long string really")
@test ss30"A Longer String!!!" === ShortString30("A Longer String!!!")

@test ss15"Short String!!!" === ShortString15("Short String!!!")
@test ss7"ShrtStr" === ShortString7("ShrtStr")
@test ss3"ss3" === ShortString3("ss3")


@testset "equality of different sized ShortStrings" begin
@test ShortString15("ab") == ShortString3("ab")
@test ShortString3("ab") == ShortString15("ab")

@test ShortString30("x") != ShortString3("y")
@test ShortString30("y") != ShortString3("x")

# this one is too big to fit in the other
@test ShortString15("abcd") != ShortString3("ab")
@test ShortString3("ab") != ShortString15("abcd")
end

@testset "cmp" begin
@test cmp(ShortString3("abc"), ShortString3("abc")) == 0
@test cmp(ShortString3("ab"), ShortString3("abc")) == -1
@test cmp(ShortString3("abc"), ShortString3("ab")) == 1
@test cmp(ShortString3("ab"), ShortString3("ac")) == -1
@test cmp(ShortString3("ac"), ShortString3("ab")) == 1
@test cmp(ShortString3("α"), ShortString3("a")) == 1
@test cmp(ShortString3("b"), ShortString3("β")) == -1

@test cmp(ShortString3("abc"), "abc") == 0
@test cmp(ShortString3("ab"), "abc") == -1
@test cmp(ShortString3("abc"), "ab") == 1
@test cmp(ShortString3("ab"), "ac") == -1
@test cmp(ShortString3("ac"), "ab") == 1
@test cmp(ShortString3("α"), "a") == 1
@test cmp(ShortString3("b"), "β") == -1
end

@testset "Construction from other ShortStrings" begin
@test ShortString7(ShortString3("ab")) == "ab"
@test ShortString7(ShortString3("ab")) isa ShortString7

@test ShortString3(ShortString7("ab")) == "ab"
@test ShortString3(ShortString7("ab")) isa ShortString3

@test ShortString7(ShortString7("ab")) == "ab"
@test ShortString7(ShortString7("ab")) isa ShortString7

@test_throws ErrorException ShortString3(ShortString7("123456"))
end

0 comments on commit 0f4f72c

Please sign in to comment.