Skip to content

Commit

Permalink
add output buffer argument to deplane_simd (#145)
Browse files Browse the repository at this point in the history
* add output buffer argument to deplane_simd

* use deplane_slow for unaligned arrays

* use deplane_slow for x64 mac or windows

* update test

* reduce allocations
  • Loading branch information
chrstphrbrns authored Mar 9, 2024
1 parent 24605fa commit c7003a7
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 17 deletions.
46 changes: 34 additions & 12 deletions src/ifds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,7 @@ function Base.read!(target::AbstractArray{T, N}, tf::TiffFile{O, S}, ifd::IFD{O}

if isplanar(ifd)
samplesv = vec(samples)
temp = deplane(samplesv, spp)
GC.@preserve samplesv temp target begin
memcpy(pointer(samplesv), pointer(temp), sizeof(target))
end
deplane!(samplesv, spp)
end
end

Expand Down Expand Up @@ -421,7 +418,12 @@ function reverse_prediction!(ifd::IFD, arr::AbstractArray{T,N}) where {T, N}

columns = columns * spp * sizeof(T)

buffer::Vector{UInt8} = Vector{UInt8}(undef, columns)

valn = Val(sizeof(T))

temp2::Ptr{UInt8} = pointer(reinterpret(UInt8, arr))

for row in 1:rows
start = (row - 1) * columns
for plane in 1:spp
Expand All @@ -433,15 +435,38 @@ function reverse_prediction!(ifd::IFD, arr::AbstractArray{T,N}) where {T, N}
end
end
vw = view(reinterpret(UInt8, arr), start+1:start+columns)
vw .= deplane(vw, sizeof(T))
deplane!(buffer, vw, valn)
end

arr .= bswap.(arr)
end
end
end

deplane(arr::AbstractVector, n::Integer) = deplane_simd(arr, Val(n))
# {AAA...BBB...CCC...} => {ABCABCABC...}
function deplane!(arr::AbstractVector{T}, n::Integer) where T
out = Vector{T}(undef, length(arr))
deplane!(out, arr, Val(n))
end

const is_mac_or_windows_x64 = (Sys.iswindows() || Sys.isapple()) && Sys.ARCH == :x86_64

# {AAA...BBB...CCC...} => {ABCABCABC...}
function deplane!(buffer::AbstractVector{T}, arr::AbstractVector{T}, n::Val{N}) where {T, N}
@assert length(buffer) == length(arr)
@assert length(arr) % N == 0

GC.@preserve arr buffer begin
if Int(pointer(arr)) & 0x3f > 0 || length(arr) < 64 || is_mac_or_windows_x64
# small or not 64-byte aligned
temp = deplane_slow(arr, N)
GC.@preserve temp begin
memcpy(pointer(arr), pointer(temp), sizeof(temp))
end
else
deplane_simd!(buffer, arr, n)
memcpy(pointer(arr), pointer(buffer), sizeof(buffer))
end
end
end

# {AAA...BBB...CCC...} => {ABCABCABC...}
function deplane_slow(arr::AbstractVector{T}, n) where T
Expand All @@ -450,7 +475,7 @@ function deplane_slow(arr::AbstractVector{T}, n) where T
end

# {AAA...BBB...CCC...} => {ABCABCABC...}
@generated function deplane_simd(arr::AbstractVector{T}, ::Val{N}) where {T, N}
@generated function deplane_simd!(out::Vector{T}, arr::AbstractVector{T}, ::Val{N}) where {T, N}
width = cld(sizeof(T) * N, 64) * 64
count = fld(width, sizeof(T) * N) # pixels per iteration

Expand Down Expand Up @@ -493,7 +518,6 @@ end

GC.@preserve arr begin
ptrA = pointer(arr)
out = Vector{T}(undef, length(arr) + $count)
num_pixels = fld(length(arr), N)
iterations = fld(num_pixels, $count) - 1
out_index = 1 # output index
Expand All @@ -515,8 +539,6 @@ end
@inbounds for i in 0:remaining-1
$(finish...)
end

resize!(out, length(out) - $count)
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion src/load.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function load(tf::TiffFile; verbose=true, mmap = false, lazyio = false)
end
end

if tf.need_bswap && !is_irregular_bps(ifd)
if (tf.need_bswap && !is_irregular_bps(ifd)) || predictor(ifd) == 3
@debug "bswap'ing data"
loaded .= bswap.(loaded)
end
Expand Down
13 changes: 9 additions & 4 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -234,18 +234,23 @@ end

for typ in [Int8,UInt16,Float32]
for planes in 1:33
for size in 100:164
for size in 64:164
out = Vector{typ}(undef, size * planes)
a=reduce(vcat,[fill(typ(x),size) for x in 1:planes])
@test TiffImages.deplane_simd(a, Val(planes)) == TiffImages.deplane_slow(a, planes)
b=copy(a)
TiffImages.deplane!(out, a, Val(planes))
@test a == TiffImages.deplane_slow(b, planes)
end
end
end

for typ in [Int8,UInt16,Float32]
for planes in 1:33
for size in 100:164
for size in 1:164
a=reduce(vcat,[fill(typ(x),size) for x in 1:planes])
@test TiffImages.deplane(a, planes) == TiffImages.deplane_slow(a, planes)
b=copy(a)
TiffImages.deplane!(a, planes)
@test a == TiffImages.deplane_slow(b, planes)
end
end
end
Expand Down

0 comments on commit c7003a7

Please sign in to comment.