Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/mdc/julia_brt' into conference_s…
Browse files Browse the repository at this point in the history
…taging
  • Loading branch information
glaroc committed Oct 7, 2023
2 parents f3e284c + 36ac168 commit c2e429c
Show file tree
Hide file tree
Showing 13 changed files with 1,631 additions and 4 deletions.
707 changes: 707 additions & 0 deletions pipelines/Block3/block3.json

Large diffs are not rendered by default.

515 changes: 515 additions & 0 deletions pipelines/Block3/brt_into_priority_map.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion runners/julia-dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
FROM julia:1.9.3

# Pre-compiling Julia dependencies
RUN julia -e 'pwd(); using Pkg; Pkg.add.(["SpeciesDistributionToolkit", "JSON", "CSV", "DataFrames", "StatsBase", "EvoTrees", "MultivariateStats" ]); Pkg.instantiate();'
RUN julia -e 'pwd(); using Pkg; Pkg.add.(["SpeciesDistributionToolkit", "Dates", "Clustering", "JSON", "CSV", "DataFrames", "StatsBase", "EvoTrees", "MultivariateStats" ]); Pkg.instantiate();'

#COPY Project.toml /root/Project.toml
#COPY instantiate.jl /root/instantiate.jl
Expand Down
105 changes: 105 additions & 0 deletions scripts/Block3/climateUniqueness.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
using SpeciesDistributionToolkit
using Clustering
using StatsBase
using CSV
using DataFrames
using MultivariateStats
using Dates
using JSON

include("shared.jl")

function convert_layers_to_features_matrix(layers, data_matrix, land_idx)
for (i,l) in enumerate(layers)
x = Float32.(vec(l.grid[land_idx]))
z = StatsBase.fit(ZScoreTransform, x)
data_matrix[i,:] .= StatsBase.transform(z, x)
end
data_matrix
end

function fill_layer!(empty_layer, vec, land_idx)
empty_layer.grid .= nothing
for (i, idx) in enumerate(land_idx)
empty_layer.grid[idx] = vec[i]
end
end

function pca_data_matrix(data_matrix)
pca = MultivariateStats.fit(PCA, data_matrix)
MultivariateStats.transform(pca, data_matrix)
end

function make_pca_matrix(layers, data_matrix, land_idx)
pca_mat = pca_data_matrix(convert_layers_to_features_matrix(layers, data_matrix, land_idx))
end

function fill_pca_layers(layers, pca_mat, land_idx)
pca_layers = [convert(Float32, similar(layers[begin])) for l in 1:size(pca_mat, 1)]
for (i,pca_layer) in enumerate(pca_layers)
fill_layer!(pca_layer, pca_mat[i,:], land_idx)
end
pca_layers
end

function kmeans_and_pca(layers, data_matrix, land_idx, k)
pca_mat = make_pca_matrix(layers, data_matrix, land_idx)
pca_layers = fill_pca_layers(layers, pca_mat, land_idx)

km = Clustering.kmeans(pca_mat, k)
Λ = collect(eachcol(km.centers))

pca_layers, Λ
end

function make_climate_uniqueness(k, land_idx, layers)
data_matrix = zeros(Float32, length(layers), length(land_idx))

pca_layers, Λ = kmeans_and_pca(layers, data_matrix, land_idx, k)

uniqueness = similar(layers[begin])
uniqueness.grid .= nothing

for i in land_idx
env_vec = [pca_layer.grid[i] for pca_layer in pca_layers]
_, m = findmin(x-> sum((env_vec .- x).^2), Λ)
uniqueness.grid[i] = sum( (env_vec .- Λ[m]).^2 )
end
return uniqueness
end

function write_outputs(runtime_dir, uniqueness)
outpath = joinpath(runtime_dir, "uniqueness.tif")
SpeciesDistributionToolkit._write_geotiff(outpath, uniqueness; compress="COG")

output_json_path = joinpath(runtime_dir, "output.json")
open(output_json_path, "w") do f
write(f, JSON.json(Dict(
:climate_uniqueness => outpath
)))
end
end


function main()
runtime_dir = ARGS[1]
inputs = read_inputs_dict(runtime_dir)

mask_path = inputs["water_mask"]
layer_paths = inputs["layers"]
k = inputs["k"]


OPEN_WATER_LABEL = 210
lc = SimpleSDMPredictor(mask_path)
land_idx = findall(!isequal(OPEN_WATER_LABEL), lc.grid)


layers = SimpleSDMPredictor.(layer_paths)

uniqueness = make_climate_uniqueness(k, land_idx, layers)

write_outputs(runtime_dir, uniqueness)
end

main()
28 changes: 28 additions & 0 deletions scripts/Block3/climateUniqueness.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
script: climateUniqueness.jl
name: Climate Uniqueness
description: "This script takes in multiple climate layers and computes a single layer where high values indicate a more rare combination of layers. Uniqueness is computed as distance to the nearest k-means center, where k is a user input."
author:
- name: Michael D. Catchen
identifier: https://orcid.org/0000-0002-6506-6487
inputs:
k:
label: k
description: the value of k for the k-means algorithm
type: int
example: 5
layers:
label: layers
description: a list of climate layers to use to compute uniqueness
type: image/tiff;application=geotiff[]
example: ["foo"]
water_mask:
label: water_mask
description: a raster with landcover values
type: image/tiff;application=geotiff[]
example: "/output/foobar/lc.tif"
outputs:
climate_uniqueness:
label: climate uniqueness
description: map
type: image/tiff;application=geotiff

132 changes: 132 additions & 0 deletions scripts/Block3/climateVelocity.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
using SpeciesDistributionToolkit
using Clustering
using StatsBase
using CSV
using DataFrames
using MultivariateStats
using Dates
using JSON

include("shared.jl")

function read_climate(runtime_dir, inputs, water_idx)
baseline_paths = inputs["baseline_layers"]
end_paths = inputs["end_layers"]

layers = [map(SimpleSDMPredictor, joinpath.(runtime_dir, x)) for x in [baseline_paths, end_paths]]

for ls in layers
for l in ls
l.grid[water_idx] .= nothing
end
end

layers
end

function get_baseline_standardization(baseline_layers, land_idx)
[StatsBase.fit(ZScoreTransform, Float32.(vec(l.grid[land_idx]))) for l in baseline_layers]
end

function standardize_layers!(zs, layers, land_idx)
for i in eachindex(zs)
v = StatsBase.transform(zs[i], Float32.(vec(layers[i].grid[land_idx])))
layers[i].grid[land_idx] .= v
end
end


function convert_layers_to_features_matrix(layer_set, data_matrix, land_idx)
for (i,l) in enumerate(layer_set)
x = Float32.(vec(l.grid[land_idx]))
data_matrix[i,:] .= x
end
data_matrix
end

function fill_layer!(empty_layer, vec, land_idx)
empty_layer.grid .= nothing
for (i, idx) in enumerate(land_idx)
empty_layer.grid[idx] = vec[i]
end
end

function pca_data_matrix(data_matrix)
pca = MultivariateStats.fit(PCA, data_matrix)
end

function apply_pca_to_layers(pca, layers, land_idx)
data_matrix = zeros(Float32, length(layers), length(land_idx))
feat_mat = convert_layers_to_features_matrix(layers, data_matrix, land_idx)

pca_mat = MultivariateStats.transform(pca, feat_mat)

pca_layers = [similar(layers[begin]) for i in 1:size(pca_mat,1)]
for (i,l) in enumerate(pca_layers)
l.grid .= nothing
l.grid[land_idx] = pca_mat[i,:]
end
pca_layers
end

function compute_velocity(climate_layers, land_idx)
delta(a,b) = vec(abs.((a - b).grid[land_idx]))
baseline, future = climate_layers[begin], climate_layers[end]

velocity = similar(climate_layers[begin][begin])
velocity.grid .= nothing
velocity.grid[land_idx] .= 0.

for i in eachindex(baseline)
dl = delta(baseline[i], future[i])
velocity.grid[land_idx] += dl
end
velocity
end

function write_outputs(runtime_dir, velocity)
outpath = joinpath(runtime_dir, "velocity.tif")
SpeciesDistributionToolkit._write_geotiff(outpath, velocity; compress="COG")

output_json_path = joinpath(runtime_dir, "output.json")
open(output_json_path, "w") do f
write(f, JSON.json(Dict(
:climate_velocity => outpath
)))
end
end

function main()
runtime_dir = ARGS[1]
inputs = read_inputs_dict(runtime_dir)

lc_path = inputs["landcover"]
OPEN_WATER_LABEL = 210
lc = SimpleSDMPredictor(lc_path)
land_idx = findall(x->!isnothing(x) && x != OPEN_WATER_LABEL, lc.grid)


water_idx = findall(isequal(OPEN_WATER_LABEL), lc.grid)

@info "about to read climate"
climate_layers = read_climate(runtime_dir, inputs, water_idx)
@info "about to standardize"
zs = get_baseline_standardization(climate_layers[begin], land_idx)
for layers in climate_layers
standardize_layers!(zs, layers, land_idx)
end

data_matrix = zeros(Float32, length(climate_layers[begin]), length(land_idx))
data_matrix = convert_layers_to_features_matrix(climate_layers[begin], data_matrix, land_idx)
pca = pca_data_matrix(data_matrix)

pca_layers = [apply_pca_to_layers(pca, layers, land_idx) for layers in climate_layers]


@info "about to compute velocity"
velocity = compute_velocity(pca_layers, land_idx)
write_outputs(runtime_dir, velocity)
end

@info "hello?????"
main()
31 changes: 31 additions & 0 deletions scripts/Block3/climateVelocity.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
script: climateVelocity.jl
name: Climate Velocity
description: "This script takes in multiple climate layers and computes the
velocity (rate) of change of each layer standardized across the period
1970-2100, split into four intervals. (This is because by default, CHELSA is
provided that way)"
author:
- name: Michael D. Catchen
identifier: https://orcid.org/0000-0002-6506-6487
inputs:
baseline_layers:
label: baseline_layers
description: a list of the baseline climate layers to use to compute velocity
type: image/tiff;application=geotiff[]
example: ["foo"]
end_layers:
label: end_layers
description: a list of the climate at the end of the period to compute
type: image/tiff;application=geotiff[]
example: "/output/foobar/lc.tif"
landcover:
label: landcover raster
description: land cover raster to mask out water
type: image/tiff;application=geotiff[]
example: "foo"
outputs:
climate_velocity:
label: climate velocity
description: map
type: image/tiff;application=geotiff

6 changes: 6 additions & 0 deletions scripts/Block3/shared.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
function read_inputs_dict(runtime_dir)
filepath = joinpath(runtime_dir, "input.json")
output_dir = joinpath(runtime_dir, "data/")
isdir(output_dir) || mkdir(output_dir)
return JSON.parsefile(filepath)
end
64 changes: 64 additions & 0 deletions scripts/Block3/weightLayers.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
using JSON
using CSV
using DataFrames
using SpeciesDistributionToolkit

include("shared.jl")

function write_outputs(runtime_dir, priority)
outpath = joinpath(runtime_dir, "priority.tif")
SpeciesDistributionToolkit._write_geotiff(outpath, priority; compress="COG")

output_json_path = joinpath(runtime_dir, "output.json")
open(output_json_path, "w") do f
write(f, JSON.json(Dict(:priority_map=>outpath)))
end
end

function get_shared_indices(layers)
land_idxs = []
for l in layers
push!(land_idxs, findall(!isnothing, l.grid))
end

(land_idxs...)
end

function main()
runtime_dir = ARGS[1]
inputs = read_inputs_dict(runtime_dir)

β = inputs["weights"]

uncert_path = inputs["uncertainty"]
uniqueness_path = inputs["climate_uniqueness"]
velocity_path = inputs["climate_velocity"]

access_path = inputs["accessibility"]

layer_paths = [uncert_path, uniqueness_path, velocity_path, access_path]

layers = [rescale(SimpleSDMPredictor(joinpath(runtime_dir, layer_path)), (0,1)) for layer_path in layer_paths]
shared_idxs = get_shared_indices(layers)

access = similar(layers[begin])
access.grid .= nothing
# note accessability gets inverted becaused smaller = better
access.grid[shared_idxs] .= 1 .- layers[end].grid[shared_idxs]
access = rescale(access, (0,1))
layers[4] = access

priority = similar(layers[begin])
priority.grid .= nothing
priority.grid[shared_idxs] .= 0.

for (i,l) in enumerate(layers)
priority.grid[shared_idxs] .+= β[i] .* l.grid[shared_idxs]
end

priority = rescale(priority, (0,1))

write_outputs(runtime_dir, priority)
end

main()
Loading

0 comments on commit c2e429c

Please sign in to comment.