Merge remote-tracking branch 'origin/mdc/julia_brt' into conference_s…

…taging
GEO-BON · Oct 7, 2023 · c2e429c · c2e429c
2 parents f3e284c + 36ac168
commit c2e429c
Show file tree

Hide file tree

Showing 13 changed files with 1,631 additions and 4 deletions.
diff --git a/pipelines/Block3/block3.json b/pipelines/Block3/block3.json
diff --git a/pipelines/Block3/brt_into_priority_map.json b/pipelines/Block3/brt_into_priority_map.json
diff --git a/runners/julia-dockerfile b/runners/julia-dockerfile
@@ -1,7 +1,7 @@
 FROM julia:1.9.3
 
 # Pre-compiling Julia dependencies
-RUN  julia -e 'pwd(); using Pkg; Pkg.add.(["SpeciesDistributionToolkit", "JSON", "CSV", "DataFrames", "StatsBase", "EvoTrees", "MultivariateStats" ]); Pkg.instantiate();'
+RUN  julia -e 'pwd(); using Pkg; Pkg.add.(["SpeciesDistributionToolkit", "Dates", "Clustering", "JSON", "CSV", "DataFrames", "StatsBase", "EvoTrees", "MultivariateStats" ]); Pkg.instantiate();'
 
 #COPY Project.toml /root/Project.toml
 #COPY instantiate.jl /root/instantiate.jl

diff --git a/scripts/Block3/climateUniqueness.jl b/scripts/Block3/climateUniqueness.jl
@@ -0,0 +1,105 @@
+using SpeciesDistributionToolkit
+using Clustering
+using StatsBase
+using CSV
+using DataFrames
+using MultivariateStats
+using Dates 
+using JSON
+
+include("shared.jl")
+
+function convert_layers_to_features_matrix(layers, data_matrix, land_idx)
+    for (i,l) in enumerate(layers)
+        x = Float32.(vec(l.grid[land_idx]))
+        z = StatsBase.fit(ZScoreTransform, x)
+        data_matrix[i,:] .= StatsBase.transform(z, x)
+    end
+    data_matrix
+end 
+
+function fill_layer!(empty_layer, vec, land_idx)
+    empty_layer.grid .= nothing 
+    for (i, idx) in enumerate(land_idx)
+        empty_layer.grid[idx] = vec[i]
+    end
+end
+
+function pca_data_matrix(data_matrix)
+    pca = MultivariateStats.fit(PCA, data_matrix)
+    MultivariateStats.transform(pca, data_matrix)
+end 
+
+function make_pca_matrix(layers, data_matrix, land_idx)  
+    pca_mat = pca_data_matrix(convert_layers_to_features_matrix(layers, data_matrix, land_idx))
+end
+
+function fill_pca_layers(layers, pca_mat, land_idx)
+    pca_layers = [convert(Float32, similar(layers[begin])) for l in 1:size(pca_mat, 1)]
+    for (i,pca_layer) in enumerate(pca_layers)
+        fill_layer!(pca_layer, pca_mat[i,:], land_idx)
+    end 
+    pca_layers
+end 
+
+function kmeans_and_pca(layers, data_matrix, land_idx, k)
+    pca_mat = make_pca_matrix(layers, data_matrix, land_idx)
+    pca_layers = fill_pca_layers(layers, pca_mat, land_idx)
+
+    km = Clustering.kmeans(pca_mat, k)
+    Λ = collect(eachcol(km.centers))
+
+    pca_layers, Λ
+end 
+
+function make_climate_uniqueness(k, land_idx, layers)
+    data_matrix = zeros(Float32, length(layers), length(land_idx))
+
+    pca_layers, Λ = kmeans_and_pca(layers, data_matrix, land_idx, k)
+
+    uniqueness = similar(layers[begin])
+    uniqueness.grid .= nothing
+
+    for i in land_idx
+        env_vec = [pca_layer.grid[i] for pca_layer in pca_layers]
+        _, m = findmin(x-> sum((env_vec .- x).^2), Λ)
+        uniqueness.grid[i] = sum( (env_vec .- Λ[m]).^2 )
+    end 
+    return uniqueness 
+end
+
+function write_outputs(runtime_dir, uniqueness)
+    outpath = joinpath(runtime_dir, "uniqueness.tif")
+    SpeciesDistributionToolkit._write_geotiff(outpath, uniqueness; compress="COG")
+
+    output_json_path = joinpath(runtime_dir, "output.json")
+    open(output_json_path, "w") do f
+        write(f, JSON.json(Dict(
+            :climate_uniqueness => outpath
+        )))
+    end 
+end
+
+
+function main()
+    runtime_dir = ARGS[1]
+    inputs = read_inputs_dict(runtime_dir)
+
+    mask_path = inputs["water_mask"]
+    layer_paths = inputs["layers"]
+    k = inputs["k"]
+
+
+    OPEN_WATER_LABEL = 210
+    lc = SimpleSDMPredictor(mask_path)
+    land_idx = findall(!isequal(OPEN_WATER_LABEL), lc.grid)
+
+
+    layers = SimpleSDMPredictor.(layer_paths)
+
+    uniqueness = make_climate_uniqueness(k, land_idx, layers)
+
+    write_outputs(runtime_dir, uniqueness)
+end 
+
+main()
diff --git a/scripts/Block3/climateUniqueness.yml b/scripts/Block3/climateUniqueness.yml
@@ -0,0 +1,28 @@
+script: climateUniqueness.jl
+name: Climate Uniqueness
+description: "This script takes in multiple climate layers and computes a single layer where high values indicate a more rare combination of layers. Uniqueness is computed as distance to the nearest k-means center, where k is a user input."
+author:
+  - name: Michael D. Catchen 
+    identifier: https://orcid.org/0000-0002-6506-6487
+inputs:
+  k:
+    label: k
+    description: the value of k for the k-means algorithm
+    type: int
+    example: 5
+  layers:
+    label: layers
+    description: a list of climate layers to use to compute uniqueness
+    type: image/tiff;application=geotiff[]
+    example: ["foo"]
+  water_mask:
+    label: water_mask
+    description: a raster with landcover values 
+    type: image/tiff;application=geotiff[]
+    example: "/output/foobar/lc.tif"
+outputs:
+  climate_uniqueness:
+    label: climate uniqueness
+    description: map
+    type: image/tiff;application=geotiff
+
diff --git a/scripts/Block3/climateVelocity.jl b/scripts/Block3/climateVelocity.jl
@@ -0,0 +1,132 @@
+using SpeciesDistributionToolkit
+using Clustering
+using StatsBase
+using CSV
+using DataFrames
+using MultivariateStats
+using Dates 
+using JSON
+
+include("shared.jl")
+
+function read_climate(runtime_dir, inputs, water_idx)
+    baseline_paths = inputs["baseline_layers"]
+    end_paths = inputs["end_layers"]
+
+    layers = [map(SimpleSDMPredictor, joinpath.(runtime_dir, x)) for x in [baseline_paths, end_paths]]
+
+    for ls in layers
+        for l in ls
+            l.grid[water_idx] .= nothing 
+        end
+    end
+
+    layers
+end
+
+function get_baseline_standardization(baseline_layers, land_idx)
+    [StatsBase.fit(ZScoreTransform, Float32.(vec(l.grid[land_idx]))) for l in baseline_layers]
+end
+
+function standardize_layers!(zs, layers, land_idx)
+    for i in eachindex(zs)
+        v = StatsBase.transform(zs[i], Float32.(vec(layers[i].grid[land_idx])))
+        layers[i].grid[land_idx] .= v
+    end 
+end
+
+
+function convert_layers_to_features_matrix(layer_set, data_matrix, land_idx)
+    for (i,l) in enumerate(layer_set)
+        x = Float32.(vec(l.grid[land_idx]))
+        data_matrix[i,:] .= x
+    end
+    data_matrix
+end 
+
+function fill_layer!(empty_layer, vec, land_idx)
+    empty_layer.grid .= nothing 
+    for (i, idx) in enumerate(land_idx)
+        empty_layer.grid[idx] = vec[i]
+    end
+end
+
+function pca_data_matrix(data_matrix)
+    pca = MultivariateStats.fit(PCA, data_matrix)
+end 
+
+function apply_pca_to_layers(pca, layers, land_idx)
+    data_matrix = zeros(Float32, length(layers), length(land_idx))
+    feat_mat = convert_layers_to_features_matrix(layers, data_matrix, land_idx)
+
+    pca_mat = MultivariateStats.transform(pca, feat_mat)
+
+    pca_layers = [similar(layers[begin]) for i in 1:size(pca_mat,1)]
+    for (i,l) in enumerate(pca_layers)
+        l.grid .= nothing
+        l.grid[land_idx] = pca_mat[i,:]
+    end
+    pca_layers
+end
+
+function compute_velocity(climate_layers, land_idx)
+    delta(a,b) = vec(abs.((a - b).grid[land_idx]))
+    baseline, future = climate_layers[begin], climate_layers[end]
+
+    velocity = similar(climate_layers[begin][begin])
+    velocity.grid .= nothing
+    velocity.grid[land_idx] .= 0.
+
+    for i in eachindex(baseline)
+        dl = delta(baseline[i], future[i])
+        velocity.grid[land_idx] += dl
+    end 
+    velocity
+end
+
+function write_outputs(runtime_dir, velocity)
+    outpath = joinpath(runtime_dir, "velocity.tif")
+    SpeciesDistributionToolkit._write_geotiff(outpath, velocity; compress="COG")
+
+    output_json_path = joinpath(runtime_dir, "output.json")
+    open(output_json_path, "w") do f
+        write(f, JSON.json(Dict(
+            :climate_velocity => outpath
+        )))
+    end 
+end
+
+function main()
+    runtime_dir = ARGS[1]
+    inputs = read_inputs_dict(runtime_dir)
+
+    lc_path = inputs["landcover"]
+    OPEN_WATER_LABEL = 210
+    lc = SimpleSDMPredictor(lc_path)
+    land_idx = findall(x->!isnothing(x) && x != OPEN_WATER_LABEL, lc.grid)
+
+
+    water_idx = findall(isequal(OPEN_WATER_LABEL), lc.grid)
+
+    @info "about to read climate"
+    climate_layers = read_climate(runtime_dir, inputs, water_idx)
+    @info "about to standardize"
+    zs = get_baseline_standardization(climate_layers[begin], land_idx)
+    for layers in climate_layers
+        standardize_layers!(zs, layers, land_idx)
+    end
+
+    data_matrix = zeros(Float32, length(climate_layers[begin]), length(land_idx))
+    data_matrix = convert_layers_to_features_matrix(climate_layers[begin], data_matrix, land_idx)
+    pca = pca_data_matrix(data_matrix)
+
+    pca_layers = [apply_pca_to_layers(pca, layers, land_idx) for layers in climate_layers]
+
+
+    @info "about to compute velocity"
+    velocity = compute_velocity(pca_layers, land_idx)
+    write_outputs(runtime_dir, velocity)
+end
+
+@info "hello?????"
+main()
diff --git a/scripts/Block3/climateVelocity.yml b/scripts/Block3/climateVelocity.yml
@@ -0,0 +1,31 @@
+script: climateVelocity.jl
+name: Climate Velocity
+description: "This script takes in multiple climate layers and computes the
+velocity (rate) of change of each layer standardized across the period
+1970-2100, split into four intervals. (This is because by default, CHELSA is
+provided that way)"
+author:
+  - name: Michael D. Catchen 
+    identifier: https://orcid.org/0000-0002-6506-6487
+inputs:
+  baseline_layers:
+    label: baseline_layers
+    description: a list of the baseline climate layers to use to compute velocity
+    type: image/tiff;application=geotiff[]
+    example: ["foo"]
+  end_layers:
+    label: end_layers
+    description: a list of the climate at the end of the period to compute
+    type: image/tiff;application=geotiff[]
+    example: "/output/foobar/lc.tif"
+  landcover:
+    label: landcover raster
+    description: land cover raster to mask out water
+    type: image/tiff;application=geotiff[]
+    example: "foo"
+outputs:
+  climate_velocity:
+    label: climate velocity
+    description: map
+    type: image/tiff;application=geotiff
+
diff --git a/scripts/Block3/shared.jl b/scripts/Block3/shared.jl
@@ -0,0 +1,6 @@
+function read_inputs_dict(runtime_dir)
+    filepath = joinpath(runtime_dir, "input.json")
+    output_dir = joinpath(runtime_dir, "data/")
+    isdir(output_dir) || mkdir(output_dir)
+    return JSON.parsefile(filepath) 
+end 
diff --git a/scripts/Block3/weightLayers.jl b/scripts/Block3/weightLayers.jl
@@ -0,0 +1,64 @@
+using JSON
+using CSV
+using DataFrames
+using SpeciesDistributionToolkit
+
+include("shared.jl")
+
+function write_outputs(runtime_dir, priority)
+    outpath = joinpath(runtime_dir, "priority.tif")
+    SpeciesDistributionToolkit._write_geotiff(outpath, priority; compress="COG")
+
+    output_json_path = joinpath(runtime_dir, "output.json")
+    open(output_json_path, "w") do f
+        write(f, JSON.json(Dict(:priority_map=>outpath)))
+    end 
+end
+
+function get_shared_indices(layers)
+    land_idxs = []
+    for l in layers
+        push!(land_idxs, findall(!isnothing, l.grid))
+    end
+
+    ∩(land_idxs...)
+end
+
+function main()
+    runtime_dir = ARGS[1]   
+    inputs = read_inputs_dict(runtime_dir)
+
+    β = inputs["weights"]
+
+    uncert_path = inputs["uncertainty"]
+    uniqueness_path = inputs["climate_uniqueness"]
+    velocity_path = inputs["climate_velocity"]
+
+    access_path = inputs["accessibility"]
+
+    layer_paths = [uncert_path, uniqueness_path, velocity_path, access_path]
+
+    layers = [rescale(SimpleSDMPredictor(joinpath(runtime_dir, layer_path)), (0,1)) for layer_path in layer_paths]
+    shared_idxs = get_shared_indices(layers)
+
+    access = similar(layers[begin])
+    access.grid .= nothing
+    # note accessability gets inverted becaused smaller = better 
+    access.grid[shared_idxs] .= 1 .- layers[end].grid[shared_idxs]
+    access = rescale(access, (0,1))
+    layers[4] = access 
+
+    priority = similar(layers[begin])
+    priority.grid .= nothing
+    priority.grid[shared_idxs] .= 0.
+
+    for (i,l) in enumerate(layers)
+        priority.grid[shared_idxs] .+= β[i] .* l.grid[shared_idxs]
+    end 
+
+    priority = rescale(priority, (0,1))
+
+    write_outputs(runtime_dir, priority)    
+end 
+
+main()