Skip to content

Commit

Permalink
Improve performance of readuntil using strings
Browse files Browse the repository at this point in the history
Heavily inspired by omus and #20621
  • Loading branch information
vtjnash committed Feb 17, 2017
1 parent ded2d87 commit 2e4f28e
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 27 deletions.
89 changes: 65 additions & 24 deletions base/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@ unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader
read(io::AbstractPipe) = read(pipe_reader(io))
readuntil(io::AbstractPipe, arg::UInt8) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::Char) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg::AbstractString) = readuntil(pipe_reader(io), arg)
readuntil(io::AbstractPipe, arg) = readuntil(pipe_reader(io), arg)
readavailable(io::AbstractPipe) = readavailable(pipe_reader(io))

isreadable(io::AbstractPipe) = isreadable(pipe_reader(io))
Expand Down Expand Up @@ -434,7 +432,7 @@ function readuntil(s::IO, delim::Char)
end

function readuntil{T}(s::IO, delim::T)
out = T[]
out = (T === UInt8 ? StringVector(0) : Vector{T}())
while !eof(s)
c = read(s, T)
push!(out, c)
Expand All @@ -445,39 +443,80 @@ function readuntil{T}(s::IO, delim::T)
return out
end

# based on code by Glen Hertz
function readuntil(s::IO, t::AbstractString)
l = length(t)
if l == 0
function _rfind_sub_c(target, endc, endstate)
# find the largest i (< endstate) such that "target[(end - i + 1):(end - 1)] * c" == "target[1:i]"
# (equivalently, find the smallest tii' such that "target[tii':(end - 1)] * c" == "target[1:(end - tii' + 1)]")
# this assumes that the `target` iterator is pure and supports equality comparison for its state
# and that endstate is a valid state for target
tii = start(target)
i = start(target)
ti = tii
while tii != endstate
c, i = next(target, i)
tc, ti = next(target, ti)
if ti == endstate
if c == endc
return i
end
else
if tc == c
continue
end
end
tii = next(target, tii)[2]
i = start(target)
ti = tii
end
return start(target)
end

function readuntil(s::IO, target::AbstractString)
ti = start(target)
if done(target, ti)
return ""
end
if l > 40
warn("readuntil(IO,AbstractString) will perform poorly with a long string")
c1, tc1 = next(target, ti)
if done(target, tc1) && c1 < Char(0x80)
return readuntil_string(s, c1 % UInt8)
end
out = IOBuffer()
m = Array{Char}(l) # last part of stream to match
t = collect(t)
i = 0
while !eof(s)
i += 1
c = read(s, Char)
write(out, c)
if i <= l
m[i] = c
tc, ti = next(target, ti)
if c == tc
done(target, ti) && break
else
# shift to last part of s
for j = 2:l
m[j-1] = m[j]
end
m[l] = c
end
if i >= l && m == t
break
ti = _rfind_sub_c(target, c, ti)
end
end
return String(take!(out))
end

function readuntil(s::IO, target::String)
ti = start(target)
if done(target, ti)
return ""
end
c1, tc1 = next(target, ti)
if done(target, tc1) && c1 < Char(0x80)
return readuntil_string(s, c1 % UInt8)
end
targetb = Vector{UInt8}(target) # convert String to a utf8-byte-iterator
out = StringVector(0)
while !eof(s)
c = read(s, UInt8)
push!(out, c)
tc, ti = next(targetb, ti)
if c == tc
done(targetb, ti) && break
else
ti = _rfind_sub_c(targetb, c, ti)
end
end
return String(out)
end

"""
readchomp(x)
Expand Down Expand Up @@ -521,11 +560,13 @@ Read at most `nb` bytes from `s`, returning a `Vector{UInt8}` of the bytes read.
function read(s::IO, nb=typemax(Int))
# Let readbytes! grow the array progressively by default
# instead of taking of risk of over-allocating
b = Array{UInt8}(nb == typemax(Int) ? 1024 : nb)
b = Vector{UInt8}(nb == typemax(Int) ? 1024 : nb)
nr = readbytes!(s, b, nb)
return resize!(b, nr)
end

read(s::IO, T::Type) = error("The IO stream does not support reading objects of type $T.")

"""
readstring(stream::IO)
readstring(filename::AbstractString)
Expand Down
20 changes: 17 additions & 3 deletions test/read.jl
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ s = io(text)
close(s)
push!(l, ("PipeEndpoint", io))


#FIXME See https://github.com/JuliaLang/julia/issues/14747
# Reading from open(::Command) seems to deadlock on Linux/Travis
#=
Expand Down Expand Up @@ -136,9 +135,24 @@ end

verbose = false


for (name, f) in l
io = ()->(s=f(text); push!(open_streams, s); s)
local function io(text=text)
local s = f(text)
push!(open_streams, s)
return s
end

verbose && println("$name readuntil...")
for (t, s, m) in [
("aaabc", "aab", "aaab"),
("assassassinass", "assassin", "assassassin")]
local t, s, m
@test readuntil(io(t), s) == m

s = SubString(s, start(s), endof(s))
@test readuntil(io(t), s) == m
end


write(filename, text)

Expand Down

0 comments on commit 2e4f28e

Please sign in to comment.