Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Constructing a pyramid with additional dimension which are not pyramided #42

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
/Manifest.toml
test/data
test/data
docs/.quarto
docs/presentation_files
2 changes: 1 addition & 1 deletion docs/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1785,7 +1785,7 @@ uuid = "43287f4e-b6f4-7ad1-bb20-aadabca52c3d"
version = "1.2.0"

[[deps.PyramidScheme]]
deps = ["Colors", "DimensionalData", "DiskArrayEngine", "DiskArrayTools", "DiskArrays", "Extents", "FillArrays", "GeoInterface", "Makie", "MakieCore", "Observables", "OffsetArrays", "Proj", "Statistics", "WGLMakie", "YAXArrayBase", "YAXArrays", "Zarr"]
deps = ["Colors", "DimensionalData", "DiskArrayEngine", "DiskArrayTools", "DiskArrays", "Extents", "FillArrays", "GeoInterface", "Makie", "MakieCore", "Observables", "OffsetArrays", "Proj", "Statistics", "YAXArrayBase", "YAXArrays", "Zarr"]
path = ".."
uuid = "ec211b67-1c2c-4319-878f-eaee078ee145"
version = "0.1.0"
Expand Down
27 changes: 21 additions & 6 deletions docs/juliacon.qmd → docs/presentation.qmd
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
---
engine: julia
title: "PyramidScheme.jl \n interactively plotting large raster data"
author: "Felix Cremer, Fabian Gans"
format: revealjs
engine: julia
execute:
echo: true
---

## Why use pyramids?

julia> agbdiffbase = agb2020 .- agb2017

╭───────────────────────────────────╮

│ 405000×157500 YAXArray{Float32,2} │

- julia> savecube(agbdiffbase, tempname()*".zarr")
Progress: 1%|▍ | ETA: 3:15:43

## What are pyramids?

```{julia}
Expand Down Expand Up @@ -65,11 +76,12 @@ plotpyramid(europyr)
```



## Compute the pyramids for your data
- In Memory data

```{julia}
#| eval: false
#| eval: false
using Rasters
using RasterDataSources
ras = Raster(WorldClim{Elevation},:elev, res="30s", lazy=true)
Expand Down Expand Up @@ -101,13 +113,13 @@ agb2020 = replacenan.(Pyramid(joinpath(@__DIR__,"..", "test/data/ESACCI-BIOMASS-
# Interactive visualisation

```{julia}
#| eval: false
#| eval: false
plot(agb2020, colormap=:speed, colorscale=sqrt)
```

# Computations on Pyramids
```{julia}
#| eval: false
#| eval: false
replacenan(data) = data <= 0 ? NaN32 : Float32(data)
agb2020 = replacenan.(Pyramid(joinpath(@__DIR__,"..", "test/data/ESACCI-BIOMASS-L4-AGB-MERGED-100m-2020-fv4.0.zarr/")))
agb2017 = replacenan.(Pyramid(joinpath(@__DIR__,"..", "test/data/ESACCI-BIOMASS-L4-AGB-MERGED-100m-2017-fv4.0.zarr/")))
Expand All @@ -118,16 +130,19 @@ plot(agbdiff, colormap=:bukavu, colorrange=(-200, 200))

# Outlook

- pyramids in multiple dimensions
- pyramids in multiple dimensions (WIP)

- Understanding more formats

- Expose data as WMTS/WMS for online usage

- Integrate DiskArrayEngine computations

- Open it up to more use cases

## Example of cloud access
```{julia}
#| eval:false
#| eval: false
using YAXArrays, PyramidScheme
using GLMakie
import PyramidScheme as PS
Expand Down
92 changes: 71 additions & 21 deletions src/PyramidScheme.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ struct Pyramid{T,N,D,A,B<:DD.AbstractDimArray{T,N,D,A},L, Me} <: DD.AbstractDimA
end

function Pyramid(data::DD.AbstractDimArray; resampling_method=mean ∘ skipmissing, kwargs...)
pyrdata, pyraxs = getpyramids(resampling_method, data; kwargs...)
levels = DD.DimArray.(pyrdata, pyraxs)
levels = getpyramids(resampling_method, data; kwargs...)
meta = Dict(deepcopy(DD.metadata(data)))
push!(meta, "resampling_method" => "mean_skipmissing")
Pyramid(data, levels, meta)
Expand Down Expand Up @@ -206,22 +205,63 @@ Fill the pyramids generated from the `data` with the aggregation function `func`
`recursive` indicates whether higher tiles are computed from lower tiles or directly from the original data.
This is an optimization which for functions like median might lead to misleading results.
"""
function fill_pyramids(data, outputs,func,recursive;runner=LocalRunner, kwargs...)
function fill_pyramids(data, outputs,func,recursive;runner=LocalRunner, verbose=false, outtype=:mem, kwargs...)
t = typeof(func(zeros(eltype(data), 2,2)))
n_level = compute_nlevels(size(data))
input_axes = pyramidedaxes(data)
nonpyramiddims = DD.otherdims(data, input_axes)
@show nonpyramiddims
if length(input_axes) != 2
throw(ArgumentError("Expected two spatial dimensions got $input_axes"))
end
verbose && println("Constructing output arrays")
spatialsize = size(data)[collect(DD.dimnum(data, input_axes))]
pyramid_sizes = [ceil.(Int, spatialsize ./ 2^i) for i in 1:n_level]
allsizes = [spatialsize...,]
sizeperm = [DD.dimnum(data, input_axes)..., DD.dimnum(data, nonpyramiddims)...]
#permute!(allsizes, sizeperm)
@show allsizes
#outputs = if outtype == :zarr
# [output_zarr(n, input_axes, t, joinpath(path, string(n))) for n in 1:n_level]
#elseif outtype == :mem
# output_arrays(pyramid_sizes, t)
#else
# throw(ArgumentError("Output type not valid got $outtype, but expected :mem or :zarr"))
#end

verbose && println("Start computation")
n_level = length(outputs)
@show typeof(data)
@show n_level
@show size.(outputs)
pixel_base_size = 2^n_level
pyramid_sizes = size.(outputs)
# What is tmp_sizes supposed to be?
# Does this have to be
tmp_sizes = [ceil(Int,pixel_base_size / 2^i) for i in 1:n_level]
windows = Any[Base.OneTo.(size(data))...]
#pseudocode
windows[DD.dimnum(data, XDim)] = RegularWindows(1,size(data, XDim),window=pixel_base_size)
windows[DD.dimnum(data, YDim)] = RegularWindows(1,size(data, YDim),window=pixel_base_size)
@show size.(windows)

ia = InputArray(data;windows = Tuple(windows))
# Replace
function outwindows(i, outputs, tmpsize)
outwins = Any[Base.OneTo.(size(data))...]
outwins[DD.dimnum(outputs[i], XDim)] = RegularWindows(1,size(outputs[i], XDim),window=tmpsize)
outwins[DD.dimnum(outputs[i], YDim)] = RegularWindows(1,size(outputs[i], YDim),window=tmpsize)
Tuple(outwins)
end

ia = InputArray(data, windows = arraywindows(size(data),pixel_base_size))

oa = ntuple(i->create_outwindows(pyramid_sizes[i],windows = arraywindows(pyramid_sizes[i],tmp_sizes[i])),n_level)

oa = ntuple(i->create_outwindows(pyramid_sizes[i],windows = outwindows(i,outputs, tmp_sizes[i])),n_level)
func = DiskArrayEngine.create_userfunction(gen_pyr,ntuple(_->eltype(first(outputs)),length(outputs));is_mutating=true,kwargs = (;recursive),args = (func,))

op = GMDWop((ia,), oa, func)

lr = DiskArrayEngine.optimize_loopranges(op,5e8,tol_low=0.2,tol_high=0.05,max_order=2)
r = runner(op,lr,outputs;kwargs...)
r = runner(op,lr,getproperty.(outputs, :data);kwargs...)
run(r)
end

Expand Down Expand Up @@ -254,18 +294,21 @@ Construct a list of `RegularWindows` for the size list in `s` for windows `w`.
??
"""
function arraywindows(s,w)
map(s) do l
@show s
@show w
windows = map(s) do l
RegularWindows(1,l,window=w)
end
windows
end


"""
compute_nlevels(data, tilesize=1024)
compute_nlevels(data, tilesize=256)

Compute the number of levels for the aggregation based on the size of `data`.
Compute the number of levels for the aggregation based on the tuple `datasize`.
"""
compute_nlevels(data, tilesize=256) = max(0,ceil(Int,log2(maximum(size(data))/tilesize)))
compute_nlevels(datasize, tilesize=256) = max(0,ceil(Int,log2(maximum(datasize)/tilesize)))

function agg_axis(d,n)
# TODO this might be problematic for explicitly set axes
Expand All @@ -284,7 +327,7 @@ function gen_output(t,s; path=tempname())
if outsize > 100e6
# This should be zgroup instead of zcreate, could use savedataset(skelton=true)
# Dummy dataset with FillArrays with the shape of the pyramidlevel
zcreate(t,s...;path,chunks = (1024,1024),fill_value=zero(t))
zcreate(t,s...;path,chunks=ntuple(i->min(256, s[i]), length(s)),fill_value=zero(t))
else
zeros(t,s...)
end
Expand All @@ -295,9 +338,9 @@ function DiskArrayEngine.engine(dimarr::DD.AbstractDimArray)
end
DiskArrayEngine.engine(pyr::Pyramid) = Pyramid(engine(parent(pyr)), engine.(pyr.levels), DD.metadata(pyr))
"""
output_arrays(pyramid_sizes)
output_arrays(pyramid_sizes, T)

Create the output arrays for the given `pyramid_sizes`
Create the output arrays for the given `pyramid_sizes` with the element type T.
"""
output_arrays(pyramid_sizes, T) = [gen_output(T,p) for p in pyramid_sizes]

Expand All @@ -307,6 +350,9 @@ Union of Dimensions which are assumed to be in space and are therefore used in t
"""
SpatialDim = Union{DD.Dimensions.XDim, DD.Dimensions.YDim}

pyramidedaxes(input) = filter(x-> x isa SpatialDim, DD.dims(input))


"""
buildpyramids(path; resampling_method=mean)
Build the pyramids for the zarr dataset at `path` and write the pyramid layers into the zarr folder.
Expand Down Expand Up @@ -375,19 +421,23 @@ Compute the data of the pyramids of a given data cube `ras`.
This returns the data of the pyramids and the dimension values of the aggregated axes.
"""
function getpyramids(reducefunc, ras;recursive=true)
input_axes = DD.dims(ras)
n_level = compute_nlevels(ras)
input_axes = pyramidedaxes(ras)
n_level = compute_nlevels(length.(input_axes))
if iszero(n_level)
@info "Array is smaller than the tilesize no pyramids are computed"
[ras], [dims(ras)]
end
pyramid_sizes = [ceil.(Int, size(ras) ./ 2^i) for i in 1:n_level]
pyramid_axes = [agg_axis.(input_axes,2^i) for i in 1:n_level]
pyramid_axes = [map(x-> in(x, input_axes) ? agg_axis(x, 2^l) : x , DD.dims(ras)) for l in 1:n_level]
pyramid_sizes = size.(pyramid_axes)
@show pyramid_sizes
t = typeof(reducefunc(zeros(eltype(ras), 2,2)))

outmin = output_arrays(pyramid_sizes, Float32)
fill_pyramids(ras,outmin,reducefunc,recursive; threaded=true)
outmin = output_arrays(pyramid_sizes, t)
@show size.(outmin)
levels = DD.DimArray.(outmin, pyramid_axes)
fill_pyramids(ras,levels,reducefunc,recursive; threaded=true)

outmin, pyramid_axes
levels
end

"""
Expand Down
23 changes: 20 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ end
end
@testitem "Helper functions" begin
using PyramidScheme: PyramidScheme as PS
@test PS.compute_nlevels(zeros(1000)) == 2
@test PS.compute_nlevels(zeros(1000,1025)) == 3
@test PS.compute_nlevels(zeros(10000, 8000)) == 6
@test PS.compute_nlevels((1000,)) == 2
@test PS.compute_nlevels((1000,1025)) == 3
@test PS.compute_nlevels((10000, 8000)) == 6
end

@testitem "ArchGDAL Loading of Geotiff Overviews" begin
Expand Down Expand Up @@ -123,3 +123,20 @@ end
end
=#

@testitem "Building pyramid with additional dimension in memory" begin
# The aim of this test is to check whether we can build a pyramid from a data cube with an extra dimension.
# We will only build the pyramids on the spatial dimensions and keep the other dimensions as is.
using YAXArrays
using Zarr
using PyramidScheme
using DimensionalData
orgpath = joinpath(@__DIR__, "data", "world.zarr")
path = tempname() * ".zarr"
cp(orgpath, path)
c = Cube(path)
@time "Building world pyramid" pyr = Pyramid(c)
@test pyr isa Pyramid
@test length(dims(pyr)) == 3
@test size(pyr.levels[end]) == (256,128,3)

end
Loading