Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make equality checks faster and fix hash #16

Merged
merged 7 commits into from
Oct 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions src/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ end

String(s::ShortString) = String(reinterpret(UInt8, [s.size_content|>ntoh])[1:sizeof(s)])

Base.codeunit(s::ShortString) = codeunit(String(s))
Base.codeunit(s::ShortString) = UInt8
Base.codeunit(s::ShortString, i) = codeunits(String(s), i)
Base.codeunit(s::ShortString, i::Integer) = codeunit(String(s), i)
Base.codeunits(s::ShortString) = codeunits(String(s))
Expand All @@ -52,18 +52,19 @@ Base.isvalid(s::ShortString, i::Integer) = isvalid(String(s), i)
Base.iterate(s::ShortString) = iterate(String(s))
Base.iterate(s::ShortString, i::Integer) = iterate(String(s), i)
Base.lastindex(s::ShortString) = sizeof(s)
Base.ncodeunits(s::ShortString) = ncodeunits(String(s))
Base.ncodeunits(s::ShortString) = sizeof(s)
xiaodaigh marked this conversation as resolved.
Show resolved Hide resolved
xiaodaigh marked this conversation as resolved.
Show resolved Hide resolved
Base.print(s::ShortString) = print(String(s))
Base.show(io::IO, str::ShortString) = show(io, String(str))
Base.sizeof(s::ShortString{T}) where T = Int(s.size_content & size_mask(T))
Base.sizeof(s::ShortString{T}) where T = Int(s.size_content & (size_mask(s) % UInt))
xiaodaigh marked this conversation as resolved.
Show resolved Hide resolved

size_nibbles(::Type{<:Union{UInt16, UInt32, UInt64, UInt128}}) = 1
size_nibbles(::Type{<:Union{Int16, Int32, Int64, Int128}}) = 1
size_nibbles(::Type{<:Union{UInt256, UInt512, UInt1024}}) = 2
size_nibbles(::Type{<:Union{Int256, Int512, Int1024}}) = 2
size_nibbles(::Type{T}) where T = ceil(log2(sizeof(T))/4)

size_mask(T) = UInt(exp2(4*size_nibbles(T)) - 1)
size_mask(T) = T(exp2(4*size_nibbles(T)) - 1)
size_mask(s::ShortString{T}) where T = size_mask(T)


# function Base.getindex(s::ShortString, i::Integer)
Expand All @@ -76,9 +77,27 @@ size_mask(T) = UInt(exp2(4*size_nibbles(T)) - 1)

Base.collect(s::ShortString) = collect(String(s))

==(s::ShortString, b::AbstractString) = begin
String(s) == b
function ==(s::ShortString{S}, b::Union{String, SubString{String}}) where S
ncodeunits(b) == ncodeunits(s) || return false
return s == ShortString{S}(b)
end
function ==(s::ShortString, b::AbstractString)
# Could be a string type that might not use UTF8 encoding and that we don't have a
# constructor for. Defer to equality that type probably has defined on `String`
return String(s) == b
end

==(a::AbstractString, b::ShortString) = b == a
function ==(a::ShortString{S}, b::ShortString{S}) where S
return a.size_content == b.size_content
end
function ==(a::ShortString{A}, b::ShortString{B}) where {A,B}
ncodeunits(a) == ncodeunits(b) || return false
# compare if equal after dropping size bits and
# flipping so that the empty bytes are at the start
ntoh(a.size_content & ~size_mask(A)) == ntoh(b.size_content & ~size_mask(B))
end


function Base.cmp(a::ShortString{S}, b::ShortString{S}) where S
return cmp(a.size_content, b.size_content)
Expand Down
4 changes: 1 addition & 3 deletions src/hash.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,4 @@ export hash

import Base.hash

Base.hash(x::ShortString, args...; kwargs...) = hash(x.size_content, args...; kwargs...)

Base.hash(x::ShortString, h::UInt) = hash(x.size_content, h)
Base.hash(x::ShortString, h::UInt) = hash(String(x), h)
xiaodaigh marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 0 additions & 5 deletions test/hash.jl

This file was deleted.

192 changes: 107 additions & 85 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,85 +1,107 @@
using ShortStrings
using BitIntegers: UInt256, UInt512, UInt1024, @define_integers
using Test, Random

include("getindex.jl")
include("hash.jl")

function basic_test(constructor, max_len)
@testset "$constructor" begin
for string_type in (String, SubString{String})
@testset "$string_type" begin
basic_test(string_type, constructor, max_len)
end
end
end
end

function basic_test(string_type, constructor, max_len)
r = string_type.(randstring.(1:max_len))
@test all(constructor.(r) .== r)
a = constructor.(r)
@test fsort(a) |> issorted

@test collect(constructor("z"^max_len)) == fill('z', max_len)
@test_throws ErrorException constructor("a"^(max_len+1))
end


basic_test(ShortString3, 3)
basic_test(ShortString7, 7)
basic_test(ShortString15, 15)
basic_test(ShortString30, 30)
basic_test(ShortString62, 62)
basic_test(ShortString126, 126)

basic_test(ShortString{UInt16}, 1)
basic_test(ShortString{UInt32}, 3)
basic_test(ShortString{UInt64}, 7)
basic_test(ShortString{UInt128}, 15)
basic_test(ShortString{UInt256}, 30)
basic_test(ShortString{UInt512}, 62)
basic_test(ShortString{UInt1024}, 126)

@define_integers 2048 MyInt2048 MyUInt2048
basic_test(ShortString{MyUInt2048}, 254)

@test ss126"Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long." === ShortString126("Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long.")
@test ss62"Basically a failly long string really" === ShortString62("Basically a failly long string really")
@test ss30"A Longer String!!!" === ShortString30("A Longer String!!!")

@test ss15"Short String!!!" === ShortString15("Short String!!!")
@test ss7"ShrtStr" === ShortString7("ShrtStr")
@test ss3"ss3" === ShortString3("ss3")


@testset "cmp" begin
@test cmp(ShortString3("abc"), ShortString3("abc")) == 0
@test cmp(ShortString3("ab"), ShortString3("abc")) == -1
@test cmp(ShortString3("abc"), ShortString3("ab")) == 1
@test cmp(ShortString3("ab"), ShortString3("ac")) == -1
@test cmp(ShortString3("ac"), ShortString3("ab")) == 1
@test cmp(ShortString3("α"), ShortString3("a")) == 1
@test cmp(ShortString3("b"), ShortString3("β")) == -1

@test cmp(ShortString3("abc"), "abc") == 0
@test cmp(ShortString3("ab"), "abc") == -1
@test cmp(ShortString3("abc"), "ab") == 1
@test cmp(ShortString3("ab"), "ac") == -1
@test cmp(ShortString3("ac"), "ab") == 1
@test cmp(ShortString3("α"), "a") == 1
@test cmp(ShortString3("b"), "β") == -1
end

@testset "Construction from other ShortStrings" begin
@test ShortString7(ShortString3("ab")) == "ab"
@test ShortString7(ShortString3("ab")) isa ShortString7

@test ShortString3(ShortString7("ab")) == "ab"
@test ShortString3(ShortString7("ab")) isa ShortString3

@test ShortString7(ShortString7("ab")) == "ab"
@test ShortString7(ShortString7("ab")) isa ShortString7

@test_throws ErrorException ShortString3(ShortString7("123456"))
end
using ShortStrings
using BitIntegers: UInt256, UInt512, UInt1024, @define_integers
using Test, Random

include("getindex.jl")

function basic_test(constructor, max_len)
@testset "$constructor" begin
for string_type in (String, SubString{String})
@testset "$string_type" begin
basic_test(string_type, constructor, max_len)
end
end
end
end

function basic_test(string_type, constructor, max_len)
r = string_type.(randstring.(1:max_len))
@test all(constructor.(r) .== r)
@test all(hash(constructor.(r)) .== hash(r))
a = constructor.(r)
@test fsort(a) |> issorted

@test collect(constructor("z"^max_len)) == fill('z', max_len)
@test_throws ErrorException constructor("a"^(max_len+1))

# equality
@test constructor("c"^max_len) == "c"^max_len
@test "c"^max_len == constructor("c"^max_len)
@test constructor("c"^max_len) == constructor("c"^max_len)
@test constructor("c"^max_len) != constructor("d"^max_len)
@test constructor("c"^max_len) != constructor("c"^(max_len-1))
@test constructor("c"^(max_len-1)) != constructor("c"^max_len)
@test constructor("c"^max_len) != "c"^(max_len-1)
@test constructor("c"^(max_len-1)) != "c"^max_len
end


basic_test(ShortString3, 3)
basic_test(ShortString7, 7)
basic_test(ShortString15, 15)
basic_test(ShortString30, 30)
basic_test(ShortString62, 62)
basic_test(ShortString126, 126)

basic_test(ShortString{UInt16}, 1)
basic_test(ShortString{UInt32}, 3)
basic_test(ShortString{UInt64}, 7)
basic_test(ShortString{UInt128}, 15)
basic_test(ShortString{UInt256}, 30)
basic_test(ShortString{UInt512}, 62)
basic_test(ShortString{UInt1024}, 126)

@define_integers 2048 MyInt2048 MyUInt2048
basic_test(ShortString{MyUInt2048}, 254)

@test ss126"Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long." === ShortString126("Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long.")
@test ss62"Basically a failly long string really" === ShortString62("Basically a failly long string really")
@test ss30"A Longer String!!!" === ShortString30("A Longer String!!!")

@test ss15"Short String!!!" === ShortString15("Short String!!!")
@test ss7"ShrtStr" === ShortString7("ShrtStr")
@test ss3"ss3" === ShortString3("ss3")


@testset "equality of different sized ShortStrings" begin
@test ShortString15("ab") == ShortString3("ab")
@test ShortString3("ab") == ShortString15("ab")

@test ShortString30("x") != ShortString3("y")
@test ShortString30("y") != ShortString3("x")

# this one is too big to fit in the other
@test ShortString15("abcd") != ShortString3("ab")
@test ShortString3("ab") != ShortString15("abcd")
end

@testset "cmp" begin
@test cmp(ShortString3("abc"), ShortString3("abc")) == 0
@test cmp(ShortString3("ab"), ShortString3("abc")) == -1
@test cmp(ShortString3("abc"), ShortString3("ab")) == 1
@test cmp(ShortString3("ab"), ShortString3("ac")) == -1
@test cmp(ShortString3("ac"), ShortString3("ab")) == 1
@test cmp(ShortString3("α"), ShortString3("a")) == 1
@test cmp(ShortString3("b"), ShortString3("β")) == -1

@test cmp(ShortString3("abc"), "abc") == 0
@test cmp(ShortString3("ab"), "abc") == -1
@test cmp(ShortString3("abc"), "ab") == 1
@test cmp(ShortString3("ab"), "ac") == -1
@test cmp(ShortString3("ac"), "ab") == 1
@test cmp(ShortString3("α"), "a") == 1
@test cmp(ShortString3("b"), "β") == -1
end

@testset "Construction from other ShortStrings" begin
@test ShortString7(ShortString3("ab")) == "ab"
@test ShortString7(ShortString3("ab")) isa ShortString7

@test ShortString3(ShortString7("ab")) == "ab"
@test ShortString3(ShortString7("ab")) isa ShortString3

@test ShortString7(ShortString7("ab")) == "ab"
@test ShortString7(ShortString7("ab")) isa ShortString7

@test_throws ErrorException ShortString3(ShortString7("123456"))
end