Skip to content

Commit

Permalink
In string search, replace unsafe_wrap with codeunits (#48275)
Browse files Browse the repository at this point in the history
Currently, `findfirst(::String, ::String)` will eventually end up calling
`unsafe_wrap` on both arguments, in order to use Julia's generic vector search
functions. This causes unnecessary allocations.

This PR replaces use of `unsafe_wrap` with `codeunits`, removing the allocation.

See also: #45393
  • Loading branch information
jakobnissen authored Jan 14, 2023
1 parent 65e6919 commit 0371bf4
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions base/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
if isascii(b)
_search(a,UInt8(b),i)
else
_search(a,unsafe_wrap(Vector{UInt8},string(b)),i).start
_search(a,codeunits(string(b)),i).start
end
end

Expand Down Expand Up @@ -98,7 +98,7 @@ function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
if isascii(b)
_rsearch(a,UInt8(b),i)
else
_rsearch(a,unsafe_wrap(Vector{UInt8},string(b)),i).start
_rsearch(a,codeunits(string(b)),i).start
end
end

Expand Down Expand Up @@ -207,7 +207,7 @@ _nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]
function _searchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
lastindex(t) == 1 && return something(findnext(isequal(t[1]), s, i), 0)
_searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i)
_searchindex(codeunits(s), codeunits(t), i)
end

function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},
Expand Down Expand Up @@ -521,7 +521,7 @@ function _rsearchindex(s::String, t::String, i::Integer)
return something(findprev(isequal(t[1]), s, i), 0)
elseif lastindex(t) != 0
j = i ncodeunits(s) ? nextind(s, i)-1 : i
return _rsearchindex(unsafe_wrap(Vector{UInt8}, s), unsafe_wrap(Vector{UInt8}, t), j)
return _rsearchindex(codeunits(s), codeunits(t), j)
elseif i > sizeof(s)
return 0
elseif i == 0
Expand Down

9 comments on commit 0371bf4

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected.
A full report can be found here.

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A lot of noise there. Many apparent inference improvements and regressions, as well as BLAS slowdowns and sparse multiply speed ups :/

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks("blas" || ("sparse" && "matmul"), vs="@d2c270960015deb92c19189ea64b2af26467501f")

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - no performance regressions were detected. A full report can be found here.

@vtjnash
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks("inference" || "blas" || ("sparse" && "matmul"), vs="@d2c270960015deb92c19189ea64b2af26467501f")

Only the sparse improvement reproduces, so I don't think it is necessary to bisect this. But confirming.

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.

Please sign in to comment.