diff --git a/base/io.jl b/base/io.jl index 63e32dc6e77ec..6f4fe34b5ab86 100644 --- a/base/io.jl +++ b/base/io.jl @@ -152,10 +152,10 @@ flush(io::AbstractPipe) = flush(pipe_writer(io)) read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io), byte) unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io), p, nb) read(io::AbstractPipe) = read(pipe_reader(io)) -readuntil(io::AbstractPipe, arg::UInt8) = readuntil(pipe_reader(io), arg) -readuntil(io::AbstractPipe, arg::Char) = readuntil(pipe_reader(io), arg) -readuntil(io::AbstractPipe, arg::AbstractString) = readuntil(pipe_reader(io), arg) -readuntil(io::AbstractPipe, arg) = readuntil(pipe_reader(io), arg) +readuntil(io::AbstractPipe, arg::UInt8) = readuntil(pipe_reader(io), arg) +readuntil(io::AbstractPipe, arg::Char) = readuntil(pipe_reader(io), arg) +readuntil_indexable(io::AbstractPipe, target#=::Indexable{T}=#, out) = readuntil_indexable(pipe_reader(io), target, out) + readavailable(io::AbstractPipe) = readavailable(pipe_reader(io)) isreadable(io::AbstractPipe) = isreadable(pipe_reader(io)) @@ -499,7 +499,7 @@ function readuntil(s::IO, delim::Char) end function readuntil(s::IO, delim::T) where T - out = T[] + out = (T === UInt8 ? StringVector(0) : Vector{T}()) while !eof(s) c = read(s, T) push!(out, c) @@ -510,39 +510,89 @@ function readuntil(s::IO, delim::T) where T return out end -# based on code by Glen Hertz -function readuntil(s::IO, t::AbstractString) - l = length(t) - if l == 0 - return "" - end - if l > 40 - warn("readuntil(IO,AbstractString) will perform poorly with a long string") +# requires that indices for target are small ordered integers bounded by start and endof +function readuntil_indexable(io::IO, target#=::Indexable{T}=#, out) + T = eltype(target) + first = start(target) + if done(target, first) + return end - out = IOBuffer() - m = Vector{Char}(l) # last part of stream to match - t = collect(t) - i = 0 - while !eof(s) - i += 1 - c = read(s, Char) - write(out, c) - if i <= l - m[i] = c + len = endof(target) + local cache # will be lazy initialized when needed + second = next(target, first)[2] + max_pos = second + pos = first + while !eof(io) + c = read(io, T) + # Backtrack until the next target character matches what was found + if out isa IO + write(out, c) else - # shift to last part of s - for j = 2:l - m[j-1] = m[j] - end - m[l] = c + push!(out, c) end - if i >= l && m == t - break + while true + c1, pos1 = next(target, pos) + if c == c1 + pos = pos1 + break + elseif pos == first + break + elseif pos == second + pos = first + else + # grow cache to contain up to `pos` + if !@isdefined(cache) + cache = zeros(Int, len) + end + while max_pos < pos + b = cache[max_pos] + first + cb, b1 = next(target, b) + ci, max_pos1 = next(target, max_pos) + if ci == cb + cache[max_pos1] = b1 - first + end + max_pos = max_pos1 + end + pos = cache[pos] + first + end end + done(target, pos) && break end - return String(take!(out)) end +function readuntil(io::IO, target::AbstractString) + # small-string target optimizations + i = start(target) + done(target, i) && return "" + c, i = next(target, start(target)) + if done(target, i) && c < Char(0x80) + return readuntil_string(io, c % UInt8) + end + # decide how we can index target + if target isa String + # convert String to a utf8-byte-iterator + target = Vector{UInt8}(target) + #elseif applicable(codeunit, target) + # TODO: a more general version of above optimization + # would be to permit accessing any string via codeunit + # target = CodeUnitVector(target) + elseif !(target isa SubString{String}) + # type with unknown indexing behavior: convert to array + target = collect(target) + end + out = (eltype(target) === UInt8 ? StringVector(0) : IOBuffer()) + readuntil_indexable(io, target, out) + out = isa(out, IO) ? take!(out) : out + return String(out) +end + +function readuntil(io::IO, target::AbstractVector{T}) where T + out = (T === UInt8 ? StringVector(0) : Vector{T}()) + readuntil_indexable(io, target, out) + return out +end + + """ readchomp(x) @@ -592,6 +642,7 @@ function read(s::IO, nb::Integer = typemax(Int)) end read(s::IO, ::Type{String}) = String(read(s)) +read(s::IO, T::Type) = error("The IO stream does not support reading objects of type $T.") ## high-level iterator interfaces ## diff --git a/base/strings/types.jl b/base/strings/types.jl index 38f3a2b521112..9e74a83a39d6a 100644 --- a/base/strings/types.jl +++ b/base/strings/types.jl @@ -30,7 +30,8 @@ SubString(s::T, i::Int, j::Int) where {T<:AbstractString} = SubString{T}(s, i, j SubString(s::SubString, i::Int, j::Int) = SubString(s.string, s.offset+i, s.offset+j) SubString(s::AbstractString, i::Integer, j::Integer) = SubString(s, Int(i), Int(j)) SubString(s::AbstractString, i::Integer) = SubString(s, i, endof(s)) -SubString{T}(s::T) where {T<:AbstractString} = SubString(s, 1, endof(s)) +SubString(s::AbstractString) = SubString(s, 1, endof(s)) +SubString{T}(s::T) where {T<:AbstractString} = SubString{T}(s, 1, endof(s)) String(p::SubString{String}) = unsafe_string(pointer(p.string, p.offset+1), nextind(p, p.endof)-1) diff --git a/src/jl_uv.c b/src/jl_uv.c index 20a1372c44e68..77719693eb943 100644 --- a/src/jl_uv.c +++ b/src/jl_uv.c @@ -382,9 +382,9 @@ JL_DLLEXPORT int jl_fs_read(int handle, char *data, size_t len) JL_DLLEXPORT int jl_fs_read_byte(int handle) { uv_fs_t req; - char c; + unsigned char c; uv_buf_t buf[1]; - buf[0].base = &c; + buf[0].base = (char*)&c; buf[0].len = 1; int ret = uv_fs_read(jl_io_loop, &req, handle, buf, 1, -1, NULL); uv_fs_req_cleanup(&req); diff --git a/test/read.jl b/test/read.jl index ba82a26aaaef6..dfe38094e4598 100644 --- a/test/read.jl +++ b/test/read.jl @@ -94,7 +94,6 @@ s = io(text) close(s) push!(l, ("PipeEndpoint", io)) - #FIXME See https://github.com/JuliaLang/julia/issues/14747 # Reading from open(::Command) seems to deadlock on Linux/Travis #= @@ -136,10 +135,38 @@ end verbose = false - for (name, f) in l local f - io = ()->(s=f(text); push!(open_streams, s); s) + local function io(text=text) + local s = f(text) + push!(open_streams, s) + return s + end + + verbose && println("$name readuntil...") + for (t, s, m) in [ + ("a", "ab", "a"), + ("b", "ab", "b"), + ("α", "αγ", "α"), + ("ab", "abc", "ab"), + ("bc", "abc", "bc"), + ("αβ", "αβγ", "αβ"), + ("aaabc", "ab", "aaab"), + ("aaabc", "ac", "aaabc"), + ("aaabc", "aab", "aaab"), + ("aaabc", "aac", "aaabc"), + ("αααβγ", "αβ", "αααβ"), + ("αααβγ", "ααβ", "αααβ"), + ("αααβγ", "αγ", "αααβγ"), + ("barbarbarians", "barbarian", "barbarbarian")] + local t, s, m + @test readuntil(io(t), s) == m + @test readuntil(io(t), SubString(s, start(s), endof(s))) == m + @test readuntil(io(t), GenericString(s)) == m + @test readuntil(io(t), Vector{UInt8}(s)) == Vector{UInt8}(m) + @test readuntil(io(t), collect(s)::Vector{Char}) == Vector{Char}(m) + end + cleanup() write(filename, text)