Skip to content

Commit

Permalink
Merge pull request #22 from oxinabox/ox/iteratehash
Browse files Browse the repository at this point in the history
Optimized isascii and length
  • Loading branch information
ScottPJones authored Nov 3, 2020
2 parents a3ab08e + 5d9f970 commit 14bf784
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 9 deletions.
1 change: 0 additions & 1 deletion src/ShortStrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,5 @@ export fsort, fsort!, ShortString,
@ss3_str, @ss7_str, @ss15_str, @ss30_str, @ss62_str, @ss126_str

include("base.jl")
include("hash.jl")

end # module
51 changes: 43 additions & 8 deletions src/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Base.show(io::IO, str::ShortString) = show(io, String(str))
end
end


size_nibbles(::Type{<:Union{UInt16, UInt32, UInt64, UInt128}}) = 1
size_nibbles(::Type{<:Union{Int16, Int32, Int64, Int128}}) = 1
size_nibbles(::Type{<:Union{UInt256, UInt512, UInt1024}}) = 2
Expand All @@ -92,16 +93,50 @@ size_nibbles(::Type{T}) where {T} = ceil(log2(sizeof(T))/4)
size_mask(T) = T(exp2(4*size_nibbles(T)) - 1)
size_mask(s::ShortString{T}) where {T} = size_mask(T)

@inline function Base.isascii(s::ShortString{T}) where T
val = s.size_content << (8*size_nibbles(T))
for i in 1:sizeof(T)
iszero(val & 0x80) || return false
val <<= 8 # first byte never matters as will always be
end
return true
end

# function Base.getindex(s::ShortString, i::Integer)
# getindex(String(s), i)
# end

# function Base.getindex(s::ShortString, args...; kwargs...)
# getindex(String(s), args...; kwargs...)
# end
function Base.length(s::ShortString{T}) where T
isascii(s) && return ncodeunits(s)

# else have to do it the hard way:
i = 0
len = 0
while i < ncodeunits(s)
shifted = s.size_content >> (8*(sizeof(T) - i))
i += if shifted % UInt8 <= 0x7f # 1 byte character
1
elseif shifted % UInt16 <= 0x7ff # 2 byte character
2
elseif shifted % UInt32 <= 0xffff # 3 byte character
3
else # 4 byte character
4
end
len += 1
end
return len
end

Base.collect(s::ShortString) = collect(String(s))
@inline function Base.iterate(s::ShortString{T}, i::Integer=1) where T
i > ncodeunits(s) && return nothing
shifted = s.size_content >> (8*(sizeof(T) - i))
if shifted % UInt8 <= 0x7f # 1 byte character
return Char(shifted % UInt8), i+1
elseif shifted % UInt16 <= 0x7ff # 2 byte character
return Char(shifted % UInt16 % UInt32), i+2
elseif shifted % UInt32 <= 0xffff # 3 byte character
return Char(shifted % UInt32 & 0xffff), i+3
else # 4 byte character
return Char(shifted % UInt32), i+4
end
end

function ==(s::ShortString{S}, b::Union{String, SubString{String}}) where {S}
ncodeunits(b) == ncodeunits(s) || return false
Expand Down
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ function basic_test(string_type, constructor, max_len)
r = string_type.(randstring.(1:max_len))
@test all(constructor.(r) .== r)
@test all(hash(constructor.(r)) .== hash(r))

a = constructor.(r)
@test fsort(a) |> issorted

Expand Down Expand Up @@ -113,3 +114,4 @@ end

# Iterations
@test collect(ShortString15("x∫yâz")) == ['x','','y','â','z']

0 comments on commit 14bf784

Please sign in to comment.