diff --git a/base/io.jl b/base/io.jl index 39479084f28c3..12480a9dd1848 100644 --- a/base/io.jl +++ b/base/io.jl @@ -154,8 +154,6 @@ unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader read(io::AbstractPipe) = read(pipe_reader(io)) readuntil(io::AbstractPipe, arg::UInt8) = readuntil(pipe_reader(io), arg) readuntil(io::AbstractPipe, arg::Char) = readuntil(pipe_reader(io), arg) -readuntil(io::AbstractPipe, arg::AbstractString) = readuntil(pipe_reader(io), arg) -readuntil(io::AbstractPipe, arg) = readuntil(pipe_reader(io), arg) readavailable(io::AbstractPipe) = readavailable(pipe_reader(io)) isreadable(io::AbstractPipe) = isreadable(pipe_reader(io)) @@ -499,7 +497,7 @@ function readuntil(s::IO, delim::Char) end function readuntil(s::IO, delim::T) where T - out = T[] + out = (T === UInt8 ? StringVector(0) : Vector{T}()) while !eof(s) c = read(s, T) push!(out, c) @@ -510,41 +508,80 @@ function readuntil(s::IO, delim::T) where T return out end +function _rfind_sub_c(target, endc, endstate) + # find the largest i (< endstate) such that "target[(end - i + 1):(end - 1)] * c" == "target[1:i]" + # (equivalently, find the smallest tii' such that "target[tii':(end - 1)] * c" == "target[1:(end - tii' + 1)]") + # this assumes that the `target` iterator is pure and supports equality comparison for its state + # and that endstate is a valid state for target + tii = start(target) + i = start(target) + ti = tii + while tii != endstate + c, i = next(target, i) + tc, ti = next(target, ti) + if ti == endstate + if c == endc + return i + end + else + if tc == c + continue + end + end + tii = next(target, tii)[2] + i = start(target) + ti = tii + end + return start(target) +end + function readuntil(s::IO, target::AbstractString) - l = length(target) - if l == 0 + ti = start(target) + if done(target, ti) return "" end - t = collect(target) - backtrack = zeros(Int, l) - for i = 2:l - b = backtrack[i - 1] + 1 - if t[i] == t[b] - backtrack[i] = b - end + c1, tc1 = next(target, ti) + if done(target, tc1) && c1 < Char(0x80) + return readuntil_string(s, c1 % UInt8) end out = IOBuffer() - i = 0 while !eof(s) c = read(s, Char) write(out, c) - while i != 0 && c != t[i + 1] - if i > 0 - i = backtrack[i] - else - i = 0 - end - end - if c == t[i + 1] - i += 1 - end - if i == l - break + tc, ti = next(target, ti) + if c == tc + done(target, ti) && break + else + ti = _rfind_sub_c(target, c, ti) end end return String(take!(out)) end +function readuntil(s::IO, target::String) + ti = start(target) + if done(target, ti) + return "" + end + c1, tc1 = next(target, ti) + if done(target, tc1) && c1 < Char(0x80) + return readuntil_string(s, c1 % UInt8) + end + targetb = Vector{UInt8}(target) # convert String to a utf8-byte-iterator + out = StringVector(0) + while !eof(s) + c = read(s, UInt8) + push!(out, c) + tc, ti = next(targetb, ti) + if c == tc + done(targetb, ti) && break + else + ti = _rfind_sub_c(targetb, c, ti) + end + end + return String(out) +end + """ readchomp(x) @@ -594,6 +631,7 @@ function read(s::IO, nb::Integer = typemax(Int)) end read(s::IO, ::Type{String}) = String(read(s)) +read(s::IO, T::Type) = error("The IO stream does not support reading objects of type $T.") ## high-level iterator interfaces ## diff --git a/test/read.jl b/test/read.jl index 155c187e2c0b1..07bce1054968b 100644 --- a/test/read.jl +++ b/test/read.jl @@ -94,12 +94,6 @@ s = io(text) close(s) push!(l, ("PipeEndpoint", io)) - -# readuntil -@test readuntil(IOBuffer("aaabc"), "aab") == "aaab" -@test readuntil(IOBuffer("assassassinass"), "assassin") == "assassassin" - - #FIXME See https://github.com/JuliaLang/julia/issues/14747 # Reading from open(::Command) seems to deadlock on Linux/Travis #= @@ -141,10 +135,24 @@ end verbose = false - for (name, f) in l local f - io = ()->(s=f(text); push!(open_streams, s); s) + local function io(text=text) + local s = f(text) + push!(open_streams, s) + return s + end + + verbose && println("$name readuntil...") + for (t, s, m) in [ + ("aaabc", "aab", "aaab"), + ("assassassinass", "assassin", "assassassin")] + local t, s, m + @test readuntil(io(t), s) == m + + s = SubString(s, start(s), endof(s)) + @test readuntil(io(t), s) == m + end write(filename, text)