From 6996ec03835b524275f64c43d3a4e923ca09e9e6 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 1 Oct 2023 11:35:16 -0400 Subject: [PATCH 01/30] no --- specification.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification.xml b/specification.xml index 679b566b..7b94df8b 100644 --- a/specification.xml +++ b/specification.xml @@ -1,4 +1,4 @@ - + From 0c1c83feebc06d85be0b180e5e5f3c20a8fc247d Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 1 Oct 2023 14:15:05 -0400 Subject: [PATCH 02/30] :tada: Julia BRT pipeline --- compose.yml | 6 +- pipelines/julia_sdm.json | 427 ++++++++++++++++++ runners/julia-dockerfile | 6 +- scripts/SDM/julia_sdms/fitBRT.jl | 163 +++++++ scripts/SDM/julia_sdms/fitBRT.yml | 35 ++ scripts/SDM/julia_sdms/generateBackground.jl | 35 ++ scripts/SDM/julia_sdms/generateBackground.yml | 28 ++ scripts/SDM/julia_sdms/loadCHELSA.jl | 90 ++++ scripts/SDM/julia_sdms/loadCHELSA.yml | 26 ++ scripts/SDM/julia_sdms/shared.jl | 15 + 10 files changed, 825 insertions(+), 6 deletions(-) create mode 100644 pipelines/julia_sdm.json create mode 100644 scripts/SDM/julia_sdms/fitBRT.jl create mode 100644 scripts/SDM/julia_sdms/fitBRT.yml create mode 100644 scripts/SDM/julia_sdms/generateBackground.jl create mode 100644 scripts/SDM/julia_sdms/generateBackground.yml create mode 100644 scripts/SDM/julia_sdms/loadCHELSA.jl create mode 100644 scripts/SDM/julia_sdms/loadCHELSA.yml create mode 100644 scripts/SDM/julia_sdms/shared.jl diff --git a/compose.yml b/compose.yml index 78f351f9..62ac1d13 100755 --- a/compose.yml +++ b/compose.yml @@ -50,9 +50,9 @@ services: runner-julia: container_name: biab-runner-julia image: geobon/bon-in-a-box:runner-julia - # build: - # context: ./runners - # dockerfile: julia-dockerfile + build: + context: ./runners + dockerfile: julia-dockerfile command: sh # Avoids launching julia session that will not be used. tty: true # Needed to keep the container alive, waiting for requests. volumes: diff --git a/pipelines/julia_sdm.json b/pipelines/julia_sdm.json new file mode 100644 index 00000000..ade0a3d6 --- /dev/null +++ b/pipelines/julia_sdm.json @@ -0,0 +1,427 @@ +{ +"nodes": [ + { + "id": "34", + "type": "io", + "position": { + "x": 1586.7477746676786, + "y": 76.24777466767864 + }, + "data": { + "descriptionFile": "filtering>cleanCoordinates.yml" + } + }, + { + "id": "35", + "type": "constant", + "position": { + "x": 1279.3155700434806, + "y": 244.87700732693582 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "text[]", + "value": [ + "equal", + "zeros", + "duplicates", + "same_pixel", + "capitals", + "centroids", + "seas", + "urban", + "gbif", + "institutions" + ] + } + }, + { + "id": "64", + "type": "constant", + "position": { + "x": 1365.2699098847381, + "y": 400.73124362757727 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "float", + "value": 0.8 + } + }, + { + "id": "96", + "type": "io", + "position": { + "x": 798, + "y": 86 + }, + "data": { + "descriptionFile": "data>pyLoadObservations>pyLoadObservations.yml" + } + }, + { + "id": "125", + "type": "io", + "position": { + "x": 2009.9356294214217, + "y": 237.59508691036865 + }, + "data": { + "descriptionFile": "SDM>julia_sdms>generateBackground.yml" + } + }, + { + "id": "126", + "type": "io", + "position": { + "x": 2528.381575227725, + "y": 232.33703046549493 + }, + "data": { + "descriptionFile": "SDM>julia_sdms>fitBRT.yml" + } + }, + { + "id": "127", + "type": "constant", + "position": { + "x": 1688.8666687011719, + "y": 193.6666717529297 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "int", + "value": 30000 + } + }, + { + "id": "128", + "type": "output", + "position": { + "x": 2944.5951749942374, + "y": 176.795471455183 + }, + "data": { + "label": "Output" + } + }, + { + "id": "129", + "type": "output", + "position": { + "x": 2934.4253987541583, + "y": 243.00293443137292 + }, + "data": { + "label": "Output" + } + }, + { + "id": "130", + "type": "output", + "position": { + "x": 2934.463085490269, + "y": 298.6066659194679 + }, + "data": { + "label": "Output" + } + }, + { + "id": "134", + "type": "constant", + "position": { + "x": 30.866668701171875, + "y": 270.6666717529297 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "float[]", + "value": [ + "-86.748047", + "33.724340", + "-52.207031", + "63.273182" + ] + } + }, + { + "id": "139", + "type": "io", + "position": { + "x": 1116.0405860379915, + "y": 604.5090499252474 + }, + "data": { + "descriptionFile": "SDM>julia_sdms>loadCHELSA.yml" + } + }, + { + "id": "140", + "type": "constant", + "position": { + "x": 155.54867352690655, + "y": 37.46137819782004 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "text", + "value": "EPSG:4326" + } + }, + { + "id": "141", + "type": "output", + "position": { + "x": 2040.8666687011719, + "y": 51.66667175292969 + }, + "data": { + "label": "Output" + } + }, + { + "id": "142", + "type": "output", + "position": { + "x": 2395.866668701172, + "y": 129.6666717529297 + }, + "data": { + "label": "Output" + } + }, + { + "id": "144", + "type": "constant", + "position": { + "x": 446.74262439587665, + "y": 100.37230768385939 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "text[]", + "value": [ + "Acer saccharum" + ] + } + } +], +"edges": [ + { + "source": "96", + "sourceHandle": "observations_file", + "target": "34", + "targetHandle": "presence", + "id": "reactflow__edge-96observations_file-34presence" + }, + { + "source": "35", + "sourceHandle": null, + "target": "34", + "targetHandle": "tests", + "id": "reactflow__edge-35-34tests" + }, + { + "source": "64", + "sourceHandle": null, + "target": "34", + "targetHandle": "env_threshold", + "id": "reactflow__edge-64-34env_threshold" + }, + { + "source": "34", + "sourceHandle": "clean_presence", + "target": "125", + "targetHandle": "presence", + "id": "reactflow__edge-34clean_presence-125presence" + }, + { + "source": "127", + "sourceHandle": null, + "target": "125", + "targetHandle": "buffer_distance", + "id": "reactflow__edge-127-125buffer_distance" + }, + { + "source": "125", + "sourceHandle": "background", + "target": "126", + "targetHandle": "background", + "id": "reactflow__edge-125background-126background" + }, + { + "source": "34", + "sourceHandle": "clean_presence", + "target": "126", + "targetHandle": "occurrence", + "id": "reactflow__edge-34clean_presence-126occurrence" + }, + { + "source": "126", + "sourceHandle": "sdm", + "target": "128", + "targetHandle": null, + "id": "reactflow__edge-126sdm-128" + }, + { + "source": "126", + "sourceHandle": "sdm_uncertainty", + "target": "129", + "targetHandle": null, + "id": "reactflow__edge-126sdm_uncertainty-129" + }, + { + "source": "126", + "sourceHandle": "fit_stats", + "target": "130", + "targetHandle": null, + "id": "reactflow__edge-126fit_stats-130" + }, + { + "source": "134", + "sourceHandle": null, + "target": "96", + "targetHandle": "bbox", + "id": "reactflow__edge-134-96bbox" + }, + { + "source": "134", + "sourceHandle": null, + "target": "139", + "targetHandle": "bbox", + "id": "reactflow__edge-134-139bbox" + }, + { + "source": "139", + "sourceHandle": "predictors", + "target": "34", + "targetHandle": "predictors", + "id": "reactflow__edge-139predictors-34predictors" + }, + { + "source": "139", + "sourceHandle": "predictors", + "target": "125", + "targetHandle": "predictors", + "id": "reactflow__edge-139predictors-125predictors" + }, + { + "source": "139", + "sourceHandle": "predictors", + "target": "126", + "targetHandle": "predictors", + "id": "reactflow__edge-139predictors-126predictors" + }, + { + "source": "140", + "sourceHandle": null, + "target": "96", + "targetHandle": "proj", + "id": "reactflow__edge-140-96proj" + }, + { + "source": "34", + "sourceHandle": "clean_presence", + "target": "141", + "targetHandle": null, + "id": "reactflow__edge-34clean_presence-141" + }, + { + "source": "125", + "sourceHandle": "background", + "target": "142", + "targetHandle": null, + "id": "reactflow__edge-125background-142" + }, + { + "source": "144", + "sourceHandle": null, + "target": "96", + "targetHandle": "taxa", + "id": "reactflow__edge-144-96taxa" + } +], +"inputs": { + "data>pyLoadObservations>pyLoadObservations.yml@96|data_source": { + "description": "Source of the data (One of gbif_pc - Planetary computer or gbif_api - GBIF Download API)", + "label": "Data source", + "type": "options", + "options": [ + "gbif_pc", + "gbif_api" + ], + "example": "gbif_api" + }, + "data>pyLoadObservations>pyLoadObservations.yml@96|min_year": { + "description": "Min year observations wanted", + "label": "minimum year", + "type": "int", + "example": 2010 + }, + "data>pyLoadObservations>pyLoadObservations.yml@96|max_year": { + "description": "Max year observations wanted", + "label": "maximum year", + "type": "int", + "example": 2020 + }, + "SDM>julia_sdms>loadCHELSA.yml@139|pca": { + "description": "Boolean, whether to PCA predictor layers or not", + "label": "pca layers", + "type": "boolean", + "example": true + }, + "SDM>julia_sdms>loadCHELSA.yml@139|layer_numbers": { + "description": "the CHELSA layers to use", + "type": "int[]", + "example": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19 + ] + } +}, +"outputs": { + "SDM>julia_sdms>fitBRT.yml@126|sdm": { + "description": "map of predicted occurrence probability", + "label": "sdm", + "type": "image/tiff;application=geotiff" + }, + "SDM>julia_sdms>fitBRT.yml@126|sdm_uncertainty": { + "description": "map of relative uncertainty", + "label": "uncertainty", + "type": "image/tiff;application=geotiff" + }, + "SDM>julia_sdms>fitBRT.yml@126|fit_stats": { + "description": "JSON of model fit stats and threshold", + "label": "fit_stats", + "type": "text/json" + }, + "filtering>cleanCoordinates.yml@34|clean_presence": { + "description": "Dataframe, table with clean presence points.", + "label": "clean presences", + "type": "text/tab-separated-values" + }, + "SDM>julia_sdms>generateBackground.yml@125|background": { + "description": "TSV file containing with the coordinates background points.", + "label": "background", + "type": "text/tab-separated-values" + } +} +} \ No newline at end of file diff --git a/runners/julia-dockerfile b/runners/julia-dockerfile index 5f4ea3d4..f98615fe 100644 --- a/runners/julia-dockerfile +++ b/runners/julia-dockerfile @@ -1,6 +1,6 @@ -FROM julia:1.8.1 +FROM julia:1.9.3 # Pre-compiling Julia dependencies -RUN julia -e 'pwd(); using Pkg; Pkg.add.(["BiodiversityObservationNetworks", "JSON", "SimpleSDMLayers", "NeutralLandscapes", "Plots"]); Pkg.instantiate();' +RUN julia -e 'pwd(); using Pkg; Pkg.add.(["SpeciesDistributionToolkit", "JSON", "CSV", "DataFrames", "StatsBase", "EvoTrees", "MultivariateStats" ]); Pkg.instantiate();' -RUN date +"%Y-%m-%d %R" > /version.txt \ No newline at end of file +RUN date +"%Y-%m-%d %R" > /version.txt diff --git a/scripts/SDM/julia_sdms/fitBRT.jl b/scripts/SDM/julia_sdms/fitBRT.jl new file mode 100644 index 00000000..7c5c7f99 --- /dev/null +++ b/scripts/SDM/julia_sdms/fitBRT.jl @@ -0,0 +1,163 @@ +using JSON +using CSV +using DataFrames +using EvoTrees +using StatsBase +using SpeciesDistributionToolkit + +include("./shared.jl") + +function get_features_and_labels(presences, absences, climate_layers) + presences = mask(presences, climate_layers[begin]) + absences = mask(absences, climate_layers[begin]) + coord_presence = keys(replace(presences, false => nothing)) + coord_absence = keys(replace(absences, false => nothing)) + coord = vcat(coord_presence, coord_absence) + + X = hcat([layer[coord] for layer in climate_layers]...) + y = vcat(fill(1.0, length(coord_presence)), fill(0.0, length(coord_absence))) + return X, y, coord +end + +function layers_to_matrix!(climate_layers, mat) + for (i, idx) in enumerate(eachindex(climate_layers[begin].grid)) + for l in eachindex(climate_layers) + mat[l, i] = climate_layers[l].grid[idx] + end + end +end + + +function compute_fit_stats_and_cutoff(distribution, coords, y) + cutoff = LinRange(extrema(distribution)..., 500) + coords = convert(Vector{typeof(coords[begin])}, coords) + idx = findall(!isnothing, coords) + I = [SimpleSDMLayers._point_to_cartesian(distribution, c) for c in coords][idx] + + obs = y .> 0 + + tp = zeros(Float64, length(cutoff)) + fp = zeros(Float64, length(cutoff)) + tn = zeros(Float64, length(cutoff)) + fn = zeros(Float64, length(cutoff)) + + for (i, c) in enumerate(cutoff) + prd = [distribution.grid[i] >= c for i in I] + tp[i] = sum(prd .& obs) + tn[i] = sum(.!(prd) .& (.!obs)) + fp[i] = sum(prd .& (.!obs)) + fn[i] = sum(.!(prd) .& obs) + end + + tpr = tp ./ (tp .+ fn) + fpr = fp ./ (fp .+ tn) + J = (tp ./ (tp .+ fn)) + (tn ./ (tn .+ fp)) .- 1.0 + + roc_dx = [reverse(fpr)[i] - reverse(fpr)[i - 1] for i in 2:length(fpr)] + roc_dy = [reverse(tpr)[i] + reverse(tpr)[i - 1] for i in 2:length(tpr)] + ROCAUC = sum(roc_dx .* (roc_dy ./ 2.0)) + + thr_index = last(findmax(J)) + τ = cutoff[thr_index] + + return Dict(:rocauc => ROCAUC, :threshold => τ, :J => J[last(findmax(J))]) +end + + +function test_train_split(X, y, proportion=0.7) + train_size = floor(Int, proportion * length(y)) + Itrain = StatsBase.sample(1:length(y), train_size; replace=false) + Itest = setdiff(1:length(y), Itrain) + Xtrain, Xtest = X[Itrain, :], X[Itest, :] + Ytrain, Ytest = y[Itrain], y[Itest] + return Xtrain, Ytrain, Xtest, Ytest +end + +function predict_single_sdm(model, layers) + mat = zeros(Float32, length(layers), prod(size(layers[begin]))) + layers_to_matrix!(layers, mat) + + I = eachindex(layers[begin].grid) + pred = EvoTrees.predict(model, mat') + + distribution = SimpleSDMPredictor( + zeros(Float32, size(layers[begin])); + SpeciesDistributionToolkit.boundingbox(layers[begin])... + ) + distribution.grid[I] = pred[:, 1] + + uncertainty = SimpleSDMPredictor(zeros(Float32, size(layers[begin])); SpeciesDistributionToolkit.boundingbox(layers[begin])...) + uncertainty.grid[I] = pred[:, 2] + + return rescale(distribution, (0, 1)), rescale(uncertainty, (0, 1)) +end + +function main() + runtime_dir = ARGS[1] + inputs = read_inputs_dict(runtime_dir) + predictor_paths = inputs["predictors"] + occurrence_path = inputs["occurrence"] + pseudoabs_path = inputs["background"] + + predictors = SimpleSDMPredictor.(predictor_paths) + + occurrence = CSV.read(occurrence_path, DataFrame) + occurrence_layer = create_occurrence_layer(similar(predictors[1]), occurrence) + + pseudoabsences = CSV.read(pseudoabs_path, DataFrame) + pseudoabs_layer = create_occurrence_layer(similar(predictors[1]), pseudoabsences) + #pseudoabs_layer = create_occurrence_layer(similar(predictors[1]), pseudoabs_df) + + X, y, p_and_a_coords = get_features_and_labels(occurrence_layer, pseudoabs_layer, predictors) + + Xtrain, Ytrain, Xtest, Ytest = test_train_split(X, y) + + brt = EvoTreeGaussian(; + loss = :gaussian, + metric = :gaussian, + nrounds = 100, + nbins = 100, + λ = 0.0, + γ = 0.0, + η = 0.1, + max_depth = 7, + min_weight = 1.0, + rowsample = 0.5, + colsample = 1.0, + ) + + + model = fit_evotree( + brt; + x_train=Xtrain, + y_train=Ytrain, + x_eval=Xtest, + y_eval=Ytest + ) + + prediction, uncertainty = predict_single_sdm(model, predictors) + + sdm_path = joinpath(runtime_dir, "sdm.tif") + SpeciesDistributionToolkit.save(sdm_path, prediction) + uncertainty_path = joinpath(runtime_dir, "uncertainty.tif") + SpeciesDistributionToolkit.save(uncertainty_path, uncertainty) + + + fit_dict = compute_fit_stats_and_cutoff(prediction, p_and_a_coords, y) + fit_stats_path = joinpath(runtime_dir, "fit_stats.json") + open(fit_stats_path, "w") do f + write(f, JSON.json(fit_dict)) + end + + output_json_path = joinpath(runtime_dir, "output.json") + open(output_json_path, "w") do f + write(f, JSON.json(Dict( + :sdm => sdm_path, + :uncertainty => uncertainty_path, + :fit_stats => fit_stats_path + ))) + end + +end + +main() \ No newline at end of file diff --git a/scripts/SDM/julia_sdms/fitBRT.yml b/scripts/SDM/julia_sdms/fitBRT.yml new file mode 100644 index 00000000..6a4aee33 --- /dev/null +++ b/scripts/SDM/julia_sdms/fitBRT.yml @@ -0,0 +1,35 @@ +script: fitBRT.jl +name: BRT +description: "This script creates an SDM and uncertainty map based on using Boosted Regression Trees (BRTs) using the package SpeciesDistributionToolkit.jl and EvoTrees.jl" +author: + - name: Michael D. Catchen + identifier: https://orcid.org/0000-0002-6506-6487 +inputs: + occurrence: + label: occurrence coordinate dataframe + description: Dataframe, presence data. + type: text/tab-separated-values + example: "/output/data/getObservations/9f7d1cc148464cd0517e01c67af0ab5b/obs_data.tsv" + background: + label: background + description: Dataframe, background data. + type: text/tab-separated-values + example: "/output/SDM/julia_sdms/generateBackground/f69fe7abd1711e193bb4f8aef51c74cc/background.csv" + predictors: + label: predictors + description: layer names (predictors) as a list + type: image/tiff;application=geotiff[] + example: ["/output/data/loadFromStac/ea82148a2926d97acf85cea61a110194/bio1_242b2b01561981-01-01.tif","/output/data/loadFromStac/ea82148a2926d97acf85cea61a110194/bio2_243a4e7f541981-01-01.tif"] +outputs: + sdm: + label: sdm + description: map of predicted occurrence probability + type: image/tiff;application=geotiff + sdm_uncertainty: + label: + description: map of relative uncertainty + type: image/tiff;application=geotiff + fit_stats: + label: fit_stats + description: JSON of model fit stats and threshold + type: text/json diff --git a/scripts/SDM/julia_sdms/generateBackground.jl b/scripts/SDM/julia_sdms/generateBackground.jl new file mode 100644 index 00000000..8d47cbc8 --- /dev/null +++ b/scripts/SDM/julia_sdms/generateBackground.jl @@ -0,0 +1,35 @@ +using JSON +using CSV +using DataFrames +using SpeciesDistributionToolkit + +include("./shared.jl") + +function main() + runtime_dir = ARGS[1] + inputs = read_inputs_dict(runtime_dir) + + predictor_paths = inputs["predictors"] + occurrence_path = inputs["presence"] + buffer_distance = inputs["buffer_distance"] / 1000 # div by 1000 to convert to km + + predictors = SimpleSDMPredictor.(predictor_paths) + + occurrence = CSV.read(occurrence_path, DataFrame) + occurrence_layer = create_occurrence_layer(similar(predictors[1]), occurrence) + + buffer = pseudoabsencemask(WithinRadius, occurrence_layer; distance = buffer_distance) + absences = SpeciesDistributionToolkit.sample(.!buffer, floor(Int, 0.5sum(occurrence_layer))) + + abs_coords = findall(absences) + pseudoabs_df = DataFrame(lon=[c[1] for c in abs_coords], lat=[c[2] for c in abs_coords]) + CSV.write("$runtime_dir/background.tsv", pseudoabs_df, delim="\t") + + output_json_path = joinpath(runtime_dir, "output.json") + open(output_json_path, "w") do f + write(f, JSON.json(Dict(:background=>"$runtime_dir/background.tsv"))) + end +end + +main() + diff --git a/scripts/SDM/julia_sdms/generateBackground.yml b/scripts/SDM/julia_sdms/generateBackground.yml new file mode 100644 index 00000000..d73d41ad --- /dev/null +++ b/scripts/SDM/julia_sdms/generateBackground.yml @@ -0,0 +1,28 @@ +script: generateBackground.jl +name: Generate Background +description: "This script creates a set of pseudoabsences/background points." +author: + - name: Michael D. Catchen + identifier: https://orcid.org/0000-0002-6506-6487 +inputs: + presence: + label: presence + description: Dataframe, presence data. + type: text/tab-separated-values + example: "/output/data/getObservations/9f7d1cc148464cd0517e01c67af0ab5b/obs_data.tsv" + predictors: + label: predictors + description: layer names (predictors) as a list + type: image/tiff;application=geotiff[] + example: ["/output/data/loadFromStac/ea82148a2926d97acf85cea61a110194/bio1_242b2b01561981-01-01.tif","/output/data/loadFromStac/ea82148a2926d97acf85cea61a110194/bio2_243a4e7f541981-01-01.tif"] + buffer_distance: + label: buffer_distance + description: the minimum distance between any presence and any pseudoabsence in meters + type: int + example: 50000 +outputs: + background: + label: background + description: TSV file containing with the coordinates background points. + type: text/tab-separated-values + diff --git a/scripts/SDM/julia_sdms/loadCHELSA.jl b/scripts/SDM/julia_sdms/loadCHELSA.jl new file mode 100644 index 00000000..ddc0c473 --- /dev/null +++ b/scripts/SDM/julia_sdms/loadCHELSA.jl @@ -0,0 +1,90 @@ +using SpeciesDistributionToolkit +using JSON +using CSV +using MultivariateStats +using StatsBase +using DataFrames + +include("./shared.jl") + +const PROVIDER = RasterData(CHELSA2, BioClim) + +function convert_layers_to_features_matrix(layers) + I = findall(!isnothing, layers[1].grid) + data_matrix = zeros(Float32, length(layers), length(I)) + for (i,l) in enumerate(layers) + x = Float32.(vec(l.grid[I])) + z = StatsBase.fit(ZScoreTransform, x) + data_matrix[i,:] .= StatsBase.transform(z, x) + end + data_matrix +end + +function fill_layer!(empty_layer, vec) + m = reshape(vec, size(empty_layer)) + for j in eachindex(empty_layer.grid) + empty_layer.grid[j] = m[j] + end +end + +function pca_data_matrix(data_matrix) + pca = MultivariateStats.fit(PCA, data_matrix) + MultivariateStats.transform(pca, data_matrix) +end + +function make_pca_layers(layers) + pca_mat = pca_data_matrix(convert_layers_to_features_matrix(layers)) + pca_layers = [convert(Float32, similar(layers[begin])) for l in 1:size(pca_mat, 1)] + for (i,pca_layer) in enumerate(pca_layers) + fill_layer!(pca_layer, pca_mat[i,:]) + end + pca_layers +end + +function write_outputs(runtime_dir, layers) + predictor_paths = [] + + for (i,l) in enumerate(layers) + outpath = joinpath(runtime_dir, "predictor$i.tif") + push!(predictor_paths, outpath) + SpeciesDistributionToolkit.save(outpath, l) + end + + output_json_path = joinpath(runtime_dir, "output.json") + open(output_json_path, "w") do f + write(f, JSON.json(Dict(:predictors=>predictor_paths))) + end +end + +function main() + runtime_dir = ARGS[1] + inputs = read_inputs_dict(runtime_dir) + + bbox = inputs["bbox"] + pca_input = inputs["pca"] + layer_nums = inputs["layer_numbers"] + layer_names = ["BIO$i" for i in layer_nums] + + bbox = (left=bbox[1], bottom=bbox[2], right=bbox[3], top=bbox[4]) + @info bbox + + layers = [] + for l in layer_names + success = false + while !success + try + a = convert(Float32, SimpleSDMPredictor(PROVIDER; layer=l, bbox...)) + success = true + push!(layers, a) + catch + @info "Errored on $l on attempt $i. Almost certainly a network error on CHELSA's side. Trying again..." + end + end + end + + layers = pca_input ? make_pca_layers(layers) : layers + + write_outputs(runtime_dir, layers) +end + +main() \ No newline at end of file diff --git a/scripts/SDM/julia_sdms/loadCHELSA.yml b/scripts/SDM/julia_sdms/loadCHELSA.yml new file mode 100644 index 00000000..e4ad5949 --- /dev/null +++ b/scripts/SDM/julia_sdms/loadCHELSA.yml @@ -0,0 +1,26 @@ +script: loadCHELSA.jl +name: Load CHELSA +description: "This script creates an SDM and uncertainty map based on using Boosted Regression Trees (BRTs) using the package SpeciesDistributionToolkit.jl and EvoTrees.jl" +author: + - name: Michael D. Catchen + identifier: https://orcid.org/0000-0002-6506-6487 +inputs: + bbox: + label: bbox + description: Vector of float, bbox coordinates of the extent in the order xmin, ymin, xmax, ymax in coordinates + type: float[] + example: [-86.75, 33.72,-52.21,63.27] + pca: + label: pca layers + description: Boolean, whether to PCA predictor layers or not + type: boolean + example: True + layer_numbers: + description: the CHELSA layers to use + type: int[] + example: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19] +outputs: + predictors: + label: predictors + description: raster, predictors + type: image/tiff;application=geotiff[] diff --git a/scripts/SDM/julia_sdms/shared.jl b/scripts/SDM/julia_sdms/shared.jl new file mode 100644 index 00000000..a4c88419 --- /dev/null +++ b/scripts/SDM/julia_sdms/shared.jl @@ -0,0 +1,15 @@ +function read_inputs_dict(runtime_dir) + filepath = joinpath(runtime_dir, "input.json") + output_dir = joinpath(runtime_dir, "data/") + isdir(output_dir) || mkdir(output_dir) + return JSON.parsefile(filepath) +end + +function create_occurrence_layer(layer, occurrence) + layer.grid .= 0 + for r in eachrow(occurrence) + long, lat = r["lon"], r["lat"] + layer[long,lat] = 1 + end + convert(Bool, layer) +end \ No newline at end of file From 121b9decb4d4fdceb5cddeefd17b6a2dab75bd76 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 1 Oct 2023 14:28:46 -0400 Subject: [PATCH 03/30] Pipeline metadata and clipping SDM below threshold --- pipelines/julia_sdm.json | 865 ++++++++++++++++--------------- scripts/SDM/julia_sdms/fitBRT.jl | 8 +- 2 files changed, 448 insertions(+), 425 deletions(-) diff --git a/pipelines/julia_sdm.json b/pipelines/julia_sdm.json index ade0a3d6..341f834d 100644 --- a/pipelines/julia_sdm.json +++ b/pipelines/julia_sdm.json @@ -1,427 +1,444 @@ { -"nodes": [ - { - "id": "34", - "type": "io", - "position": { - "x": 1586.7477746676786, - "y": 76.24777466767864 - }, - "data": { - "descriptionFile": "filtering>cleanCoordinates.yml" - } - }, - { - "id": "35", - "type": "constant", - "position": { - "x": 1279.3155700434806, - "y": 244.87700732693582 - }, - "dragHandle": ".dragHandle", - "data": { - "type": "text[]", - "value": [ - "equal", - "zeros", - "duplicates", - "same_pixel", - "capitals", - "centroids", - "seas", - "urban", - "gbif", - "institutions" - ] - } - }, - { - "id": "64", - "type": "constant", - "position": { - "x": 1365.2699098847381, - "y": 400.73124362757727 - }, - "dragHandle": ".dragHandle", - "data": { - "type": "float", - "value": 0.8 - } - }, - { - "id": "96", - "type": "io", - "position": { - "x": 798, - "y": 86 - }, - "data": { - "descriptionFile": "data>pyLoadObservations>pyLoadObservations.yml" - } - }, - { - "id": "125", - "type": "io", - "position": { - "x": 2009.9356294214217, - "y": 237.59508691036865 - }, - "data": { - "descriptionFile": "SDM>julia_sdms>generateBackground.yml" - } - }, - { - "id": "126", - "type": "io", - "position": { - "x": 2528.381575227725, - "y": 232.33703046549493 - }, - "data": { - "descriptionFile": "SDM>julia_sdms>fitBRT.yml" - } - }, - { - "id": "127", - "type": "constant", - "position": { - "x": 1688.8666687011719, - "y": 193.6666717529297 - }, - "dragHandle": ".dragHandle", - "data": { - "type": "int", - "value": 30000 - } - }, - { - "id": "128", - "type": "output", - "position": { - "x": 2944.5951749942374, - "y": 176.795471455183 - }, - "data": { - "label": "Output" - } - }, - { - "id": "129", - "type": "output", - "position": { - "x": 2934.4253987541583, - "y": 243.00293443137292 - }, - "data": { - "label": "Output" - } - }, - { - "id": "130", - "type": "output", - "position": { - "x": 2934.463085490269, - "y": 298.6066659194679 - }, - "data": { - "label": "Output" - } - }, - { - "id": "134", - "type": "constant", - "position": { - "x": 30.866668701171875, - "y": 270.6666717529297 - }, - "dragHandle": ".dragHandle", - "data": { - "type": "float[]", - "value": [ - "-86.748047", - "33.724340", - "-52.207031", - "63.273182" - ] - } - }, - { - "id": "139", - "type": "io", - "position": { - "x": 1116.0405860379915, - "y": 604.5090499252474 - }, - "data": { - "descriptionFile": "SDM>julia_sdms>loadCHELSA.yml" - } - }, - { - "id": "140", - "type": "constant", - "position": { - "x": 155.54867352690655, - "y": 37.46137819782004 - }, - "dragHandle": ".dragHandle", - "data": { - "type": "text", - "value": "EPSG:4326" - } - }, - { - "id": "141", - "type": "output", - "position": { - "x": 2040.8666687011719, - "y": 51.66667175292969 - }, - "data": { - "label": "Output" - } - }, - { - "id": "142", - "type": "output", - "position": { - "x": 2395.866668701172, - "y": 129.6666717529297 - }, - "data": { - "label": "Output" - } - }, - { - "id": "144", - "type": "constant", - "position": { - "x": 446.74262439587665, - "y": 100.37230768385939 - }, - "dragHandle": ".dragHandle", - "data": { - "type": "text[]", - "value": [ - "Acer saccharum" - ] - } - } -], -"edges": [ - { - "source": "96", - "sourceHandle": "observations_file", - "target": "34", - "targetHandle": "presence", - "id": "reactflow__edge-96observations_file-34presence" - }, - { - "source": "35", - "sourceHandle": null, - "target": "34", - "targetHandle": "tests", - "id": "reactflow__edge-35-34tests" - }, - { - "source": "64", - "sourceHandle": null, - "target": "34", - "targetHandle": "env_threshold", - "id": "reactflow__edge-64-34env_threshold" - }, - { - "source": "34", - "sourceHandle": "clean_presence", - "target": "125", - "targetHandle": "presence", - "id": "reactflow__edge-34clean_presence-125presence" - }, - { - "source": "127", - "sourceHandle": null, - "target": "125", - "targetHandle": "buffer_distance", - "id": "reactflow__edge-127-125buffer_distance" - }, - { - "source": "125", - "sourceHandle": "background", - "target": "126", - "targetHandle": "background", - "id": "reactflow__edge-125background-126background" - }, - { - "source": "34", - "sourceHandle": "clean_presence", - "target": "126", - "targetHandle": "occurrence", - "id": "reactflow__edge-34clean_presence-126occurrence" - }, - { - "source": "126", - "sourceHandle": "sdm", - "target": "128", - "targetHandle": null, - "id": "reactflow__edge-126sdm-128" - }, - { - "source": "126", - "sourceHandle": "sdm_uncertainty", - "target": "129", - "targetHandle": null, - "id": "reactflow__edge-126sdm_uncertainty-129" - }, - { - "source": "126", - "sourceHandle": "fit_stats", - "target": "130", - "targetHandle": null, - "id": "reactflow__edge-126fit_stats-130" - }, - { - "source": "134", - "sourceHandle": null, - "target": "96", - "targetHandle": "bbox", - "id": "reactflow__edge-134-96bbox" - }, - { - "source": "134", - "sourceHandle": null, - "target": "139", - "targetHandle": "bbox", - "id": "reactflow__edge-134-139bbox" - }, - { - "source": "139", - "sourceHandle": "predictors", - "target": "34", - "targetHandle": "predictors", - "id": "reactflow__edge-139predictors-34predictors" - }, - { - "source": "139", - "sourceHandle": "predictors", - "target": "125", - "targetHandle": "predictors", - "id": "reactflow__edge-139predictors-125predictors" - }, - { - "source": "139", - "sourceHandle": "predictors", - "target": "126", - "targetHandle": "predictors", - "id": "reactflow__edge-139predictors-126predictors" - }, - { - "source": "140", - "sourceHandle": null, - "target": "96", - "targetHandle": "proj", - "id": "reactflow__edge-140-96proj" - }, - { - "source": "34", - "sourceHandle": "clean_presence", - "target": "141", - "targetHandle": null, - "id": "reactflow__edge-34clean_presence-141" - }, - { - "source": "125", - "sourceHandle": "background", - "target": "142", - "targetHandle": null, - "id": "reactflow__edge-125background-142" - }, - { - "source": "144", - "sourceHandle": null, - "target": "96", - "targetHandle": "taxa", - "id": "reactflow__edge-144-96taxa" - } -], -"inputs": { - "data>pyLoadObservations>pyLoadObservations.yml@96|data_source": { - "description": "Source of the data (One of gbif_pc - Planetary computer or gbif_api - GBIF Download API)", - "label": "Data source", - "type": "options", - "options": [ - "gbif_pc", - "gbif_api" + "nodes": [ + { + "id": "34", + "type": "io", + "position": { + "x": 1586.7477746676786, + "y": 76.24777466767864 + }, + "data": { + "descriptionFile": "filtering>cleanCoordinates.yml" + } + }, + { + "id": "35", + "type": "constant", + "position": { + "x": 1279.3155700434806, + "y": 244.87700732693582 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "text[]", + "value": [ + "equal", + "zeros", + "duplicates", + "same_pixel", + "capitals", + "centroids", + "seas", + "urban", + "gbif", + "institutions" + ] + } + }, + { + "id": "64", + "type": "constant", + "position": { + "x": 1365.2699098847381, + "y": 400.73124362757727 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "float", + "value": 0.8 + } + }, + { + "id": "96", + "type": "io", + "position": { + "x": 798, + "y": 86 + }, + "data": { + "descriptionFile": "data>pyLoadObservations>pyLoadObservations.yml" + } + }, + { + "id": "125", + "type": "io", + "position": { + "x": 2009.9356294214217, + "y": 237.59508691036865 + }, + "data": { + "descriptionFile": "SDM>julia_sdms>generateBackground.yml" + } + }, + { + "id": "126", + "type": "io", + "position": { + "x": 2528.381575227725, + "y": 232.33703046549493 + }, + "data": { + "descriptionFile": "SDM>julia_sdms>fitBRT.yml" + } + }, + { + "id": "127", + "type": "constant", + "position": { + "x": 1688.8666687011719, + "y": 193.6666717529297 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "int", + "value": 30000 + } + }, + { + "id": "128", + "type": "output", + "position": { + "x": 2944.5951749942374, + "y": 176.795471455183 + }, + "data": { + "label": "Output" + } + }, + { + "id": "129", + "type": "output", + "position": { + "x": 2934.4253987541583, + "y": 243.00293443137292 + }, + "data": { + "label": "Output" + } + }, + { + "id": "130", + "type": "output", + "position": { + "x": 2934.463085490269, + "y": 298.6066659194679 + }, + "data": { + "label": "Output" + } + }, + { + "id": "134", + "type": "constant", + "position": { + "x": 30.866668701171875, + "y": 270.6666717529297 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "float[]", + "value": [ + "-86.748047", + "33.724340", + "-52.207031", + "63.273182" + ] + } + }, + { + "id": "139", + "type": "io", + "position": { + "x": 1116.0405860379915, + "y": 604.5090499252474 + }, + "data": { + "descriptionFile": "SDM>julia_sdms>loadCHELSA.yml" + } + }, + { + "id": "140", + "type": "constant", + "position": { + "x": 155.54867352690655, + "y": 37.46137819782004 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "text", + "value": "EPSG:4326" + } + }, + { + "id": "141", + "type": "output", + "position": { + "x": 2040.8666687011719, + "y": 51.66667175292969 + }, + "data": { + "label": "Output" + } + }, + { + "id": "142", + "type": "output", + "position": { + "x": 2395.866668701172, + "y": 129.6666717529297 + }, + "data": { + "label": "Output" + } + }, + { + "id": "144", + "type": "constant", + "position": { + "x": 446.74262439587665, + "y": 100.37230768385939 + }, + "dragHandle": ".dragHandle", + "data": { + "type": "text[]", + "value": [ + "Acer saccharum" + ] + } + } ], - "example": "gbif_api" - }, - "data>pyLoadObservations>pyLoadObservations.yml@96|min_year": { - "description": "Min year observations wanted", - "label": "minimum year", - "type": "int", - "example": 2010 - }, - "data>pyLoadObservations>pyLoadObservations.yml@96|max_year": { - "description": "Max year observations wanted", - "label": "maximum year", - "type": "int", - "example": 2020 - }, - "SDM>julia_sdms>loadCHELSA.yml@139|pca": { - "description": "Boolean, whether to PCA predictor layers or not", - "label": "pca layers", - "type": "boolean", - "example": true - }, - "SDM>julia_sdms>loadCHELSA.yml@139|layer_numbers": { - "description": "the CHELSA layers to use", - "type": "int[]", - "example": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19 - ] - } -}, -"outputs": { - "SDM>julia_sdms>fitBRT.yml@126|sdm": { - "description": "map of predicted occurrence probability", - "label": "sdm", - "type": "image/tiff;application=geotiff" - }, - "SDM>julia_sdms>fitBRT.yml@126|sdm_uncertainty": { - "description": "map of relative uncertainty", - "label": "uncertainty", - "type": "image/tiff;application=geotiff" - }, - "SDM>julia_sdms>fitBRT.yml@126|fit_stats": { - "description": "JSON of model fit stats and threshold", - "label": "fit_stats", - "type": "text/json" - }, - "filtering>cleanCoordinates.yml@34|clean_presence": { - "description": "Dataframe, table with clean presence points.", - "label": "clean presences", - "type": "text/tab-separated-values" - }, - "SDM>julia_sdms>generateBackground.yml@125|background": { - "description": "TSV file containing with the coordinates background points.", - "label": "background", - "type": "text/tab-separated-values" + "edges": [ + { + "source": "96", + "sourceHandle": "observations_file", + "target": "34", + "targetHandle": "presence", + "id": "reactflow__edge-96observations_file-34presence" + }, + { + "source": "35", + "sourceHandle": null, + "target": "34", + "targetHandle": "tests", + "id": "reactflow__edge-35-34tests" + }, + { + "source": "64", + "sourceHandle": null, + "target": "34", + "targetHandle": "env_threshold", + "id": "reactflow__edge-64-34env_threshold" + }, + { + "source": "34", + "sourceHandle": "clean_presence", + "target": "125", + "targetHandle": "presence", + "id": "reactflow__edge-34clean_presence-125presence" + }, + { + "source": "127", + "sourceHandle": null, + "target": "125", + "targetHandle": "buffer_distance", + "id": "reactflow__edge-127-125buffer_distance" + }, + { + "source": "125", + "sourceHandle": "background", + "target": "126", + "targetHandle": "background", + "id": "reactflow__edge-125background-126background" + }, + { + "source": "34", + "sourceHandle": "clean_presence", + "target": "126", + "targetHandle": "occurrence", + "id": "reactflow__edge-34clean_presence-126occurrence" + }, + { + "source": "126", + "sourceHandle": "sdm", + "target": "128", + "targetHandle": null, + "id": "reactflow__edge-126sdm-128" + }, + { + "source": "126", + "sourceHandle": "sdm_uncertainty", + "target": "129", + "targetHandle": null, + "id": "reactflow__edge-126sdm_uncertainty-129" + }, + { + "source": "126", + "sourceHandle": "fit_stats", + "target": "130", + "targetHandle": null, + "id": "reactflow__edge-126fit_stats-130" + }, + { + "source": "134", + "sourceHandle": null, + "target": "96", + "targetHandle": "bbox", + "id": "reactflow__edge-134-96bbox" + }, + { + "source": "134", + "sourceHandle": null, + "target": "139", + "targetHandle": "bbox", + "id": "reactflow__edge-134-139bbox" + }, + { + "source": "139", + "sourceHandle": "predictors", + "target": "34", + "targetHandle": "predictors", + "id": "reactflow__edge-139predictors-34predictors" + }, + { + "source": "139", + "sourceHandle": "predictors", + "target": "125", + "targetHandle": "predictors", + "id": "reactflow__edge-139predictors-125predictors" + }, + { + "source": "139", + "sourceHandle": "predictors", + "target": "126", + "targetHandle": "predictors", + "id": "reactflow__edge-139predictors-126predictors" + }, + { + "source": "140", + "sourceHandle": null, + "target": "96", + "targetHandle": "proj", + "id": "reactflow__edge-140-96proj" + }, + { + "source": "34", + "sourceHandle": "clean_presence", + "target": "141", + "targetHandle": null, + "id": "reactflow__edge-34clean_presence-141" + }, + { + "source": "125", + "sourceHandle": "background", + "target": "142", + "targetHandle": null, + "id": "reactflow__edge-125background-142" + }, + { + "source": "144", + "sourceHandle": null, + "target": "96", + "targetHandle": "taxa", + "id": "reactflow__edge-144-96taxa" + } + ], + "inputs": { + "data>pyLoadObservations>pyLoadObservations.yml@96|data_source": { + "description": "Source of the data (One of gbif_pc - Planetary computer or gbif_api - GBIF Download API)", + "label": "Data source", + "type": "options", + "options": [ + "gbif_pc", + "gbif_api" + ], + "example": "gbif_api" + }, + "data>pyLoadObservations>pyLoadObservations.yml@96|min_year": { + "description": "Min year observations wanted", + "label": "minimum year", + "type": "int", + "example": 2010 + }, + "data>pyLoadObservations>pyLoadObservations.yml@96|max_year": { + "description": "Max year observations wanted", + "label": "maximum year", + "type": "int", + "example": 2020 + }, + "SDM>julia_sdms>loadCHELSA.yml@139|pca": { + "description": "Boolean, whether to PCA predictor layers or not", + "label": "pca layers", + "type": "boolean", + "example": true + }, + "SDM>julia_sdms>loadCHELSA.yml@139|layer_numbers": { + "description": "The numbers of CHELSA layers to use", + "type": "int[]", + "example": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19 + ], + "label": "CHELSA Layers" + } + }, + "outputs": { + "SDM>julia_sdms>fitBRT.yml@126|sdm": { + "description": "Map of predicted occurrence probability", + "label": "SDM", + "type": "image/tiff;application=geotiff" + }, + "SDM>julia_sdms>fitBRT.yml@126|sdm_uncertainty": { + "description": "Map of relative model uncertainty", + "label": "Uncertainty", + "type": "image/tiff;application=geotiff" + }, + "SDM>julia_sdms>fitBRT.yml@126|fit_stats": { + "description": "JSON of BRT fit stats and optimal cutoff threshold", + "label": "Fit Stats", + "type": "text/json" + }, + "filtering>cleanCoordinates.yml@34|clean_presence": { + "description": "Data frame with clean occurrence points.", + "label": "Cleaned Occurrences", + "type": "text/tab-separated-values" + }, + "SDM>julia_sdms>generateBackground.yml@125|background": { + "description": "TSV file containing with the background (pseudoabsence) coordinates.", + "label": "Background Points", + "type": "text/tab-separated-values" + } + }, + "metadata": { + "name": "Julia BRT SDM", + "description": "Pipeline to make a species distribution model (SDM) using Boosted Regression Trees (BRT) in Julia, using the SpeciesDistributionToolkit.jl and EvoTrees.jl packages.", + "author": [ + { + "name": "Michael D. Catchen", + "identifier": "https://orcid.org/0000-0002-6506-6487" + }, + { + "name": "Timothée Poisot", + "identifier": "https://orcid.org/0000-0002-0735-5184" + } + ], + "license": "MIT", + "external_link": null } -} -} \ No newline at end of file + } \ No newline at end of file diff --git a/scripts/SDM/julia_sdms/fitBRT.jl b/scripts/SDM/julia_sdms/fitBRT.jl index 7c5c7f99..325549fb 100644 --- a/scripts/SDM/julia_sdms/fitBRT.jl +++ b/scripts/SDM/julia_sdms/fitBRT.jl @@ -89,7 +89,7 @@ function predict_single_sdm(model, layers) uncertainty = SimpleSDMPredictor(zeros(Float32, size(layers[begin])); SpeciesDistributionToolkit.boundingbox(layers[begin])...) uncertainty.grid[I] = pred[:, 2] - return rescale(distribution, (0, 1)), rescale(uncertainty, (0, 1)) + rescale(distribution, (0, 1)), rescale(uncertainty, (0, 1)) end function main() @@ -144,6 +144,12 @@ function main() fit_dict = compute_fit_stats_and_cutoff(prediction, p_and_a_coords, y) + + τ = fit_dict["threshold"] + + # Set below threshold to 0 + prediction.grid[findall(x -> x < τ, prediction.grid)] .= 0 + fit_stats_path = joinpath(runtime_dir, "fit_stats.json") open(fit_stats_path, "w") do f write(f, JSON.json(fit_dict)) From 6e986cd6e4a5f8d5a2d82352529fb03489954b64 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 1 Oct 2023 14:30:21 -0400 Subject: [PATCH 04/30] :bug: in fit stats dir --- scripts/SDM/julia_sdms/fitBRT.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/SDM/julia_sdms/fitBRT.jl b/scripts/SDM/julia_sdms/fitBRT.jl index 325549fb..2912e32c 100644 --- a/scripts/SDM/julia_sdms/fitBRT.jl +++ b/scripts/SDM/julia_sdms/fitBRT.jl @@ -145,7 +145,7 @@ function main() fit_dict = compute_fit_stats_and_cutoff(prediction, p_and_a_coords, y) - τ = fit_dict["threshold"] + τ = fit_dict[:threshold] # Set below threshold to 0 prediction.grid[findall(x -> x < τ, prediction.grid)] .= 0 From 15b7c6bd98e22b8aad6581acdbf95e7fd0701c34 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 1 Oct 2023 14:33:21 -0400 Subject: [PATCH 05/30] :bug: in clipping --- scripts/SDM/julia_sdms/fitBRT.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/scripts/SDM/julia_sdms/fitBRT.jl b/scripts/SDM/julia_sdms/fitBRT.jl index 2912e32c..da61dfd9 100644 --- a/scripts/SDM/julia_sdms/fitBRT.jl +++ b/scripts/SDM/julia_sdms/fitBRT.jl @@ -137,19 +137,18 @@ function main() prediction, uncertainty = predict_single_sdm(model, predictors) - sdm_path = joinpath(runtime_dir, "sdm.tif") - SpeciesDistributionToolkit.save(sdm_path, prediction) - uncertainty_path = joinpath(runtime_dir, "uncertainty.tif") - SpeciesDistributionToolkit.save(uncertainty_path, uncertainty) - - fit_dict = compute_fit_stats_and_cutoff(prediction, p_and_a_coords, y) - τ = fit_dict[:threshold] # Set below threshold to 0 prediction.grid[findall(x -> x < τ, prediction.grid)] .= 0 + sdm_path = joinpath(runtime_dir, "sdm.tif") + SpeciesDistributionToolkit.save(sdm_path, prediction) + uncertainty_path = joinpath(runtime_dir, "uncertainty.tif") + SpeciesDistributionToolkit.save(uncertainty_path, uncertainty) + + fit_stats_path = joinpath(runtime_dir, "fit_stats.json") open(fit_stats_path, "w") do f write(f, JSON.json(fit_dict)) From fe5db3f28ecd809ff2329917875a9869fed49236 Mon Sep 17 00:00:00 2001 From: michael Date: Sun, 1 Oct 2023 18:26:16 -0400 Subject: [PATCH 06/30] yaml data --- scripts/SDM/julia_sdms/fitBRT.yml | 2 +- scripts/SDM/julia_sdms/loadCHELSA.jl | 4 +++- scripts/SDM/julia_sdms/loadCHELSA.yml | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/SDM/julia_sdms/fitBRT.yml b/scripts/SDM/julia_sdms/fitBRT.yml index 6a4aee33..52076a2c 100644 --- a/scripts/SDM/julia_sdms/fitBRT.yml +++ b/scripts/SDM/julia_sdms/fitBRT.yml @@ -26,7 +26,7 @@ outputs: description: map of predicted occurrence probability type: image/tiff;application=geotiff sdm_uncertainty: - label: + label: sdm uncertainty description: map of relative uncertainty type: image/tiff;application=geotiff fit_stats: diff --git a/scripts/SDM/julia_sdms/loadCHELSA.jl b/scripts/SDM/julia_sdms/loadCHELSA.jl index ddc0c473..5ce3de05 100644 --- a/scripts/SDM/julia_sdms/loadCHELSA.jl +++ b/scripts/SDM/julia_sdms/loadCHELSA.jl @@ -71,13 +71,15 @@ function main() layers = [] for l in layer_names success = false + ct = 1 while !success try a = convert(Float32, SimpleSDMPredictor(PROVIDER; layer=l, bbox...)) success = true push!(layers, a) catch - @info "Errored on $l on attempt $i. Almost certainly a network error on CHELSA's side. Trying again..." + @info "Errored on $l on attempt $ct. Almost certainly a network error on CHELSA's side. Trying again..." + ct += 1 end end end diff --git a/scripts/SDM/julia_sdms/loadCHELSA.yml b/scripts/SDM/julia_sdms/loadCHELSA.yml index e4ad5949..9fc58603 100644 --- a/scripts/SDM/julia_sdms/loadCHELSA.yml +++ b/scripts/SDM/julia_sdms/loadCHELSA.yml @@ -16,6 +16,7 @@ inputs: type: boolean example: True layer_numbers: + label: CHELSA layer numbers description: the CHELSA layers to use type: int[] example: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19] From 0171f24ef504d3b2b062fec6a09a7c65cdc86e8b Mon Sep 17 00:00:00 2001 From: Jean-Michel Lord Date: Mon, 2 Oct 2023 11:18:41 -0400 Subject: [PATCH 07/30] revert change that should not be merged to main --- compose.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compose.yml b/compose.yml index 62ac1d13..78f351f9 100755 --- a/compose.yml +++ b/compose.yml @@ -50,9 +50,9 @@ services: runner-julia: container_name: biab-runner-julia image: geobon/bon-in-a-box:runner-julia - build: - context: ./runners - dockerfile: julia-dockerfile + # build: + # context: ./runners + # dockerfile: julia-dockerfile command: sh # Avoids launching julia session that will not be used. tty: true # Needed to keep the container alive, waiting for requests. volumes: From 15e5a42a6b0b9dd6a22e5a4a3a29cabff19961f6 Mon Sep 17 00:00:00 2001 From: Jean-Michel Date: Thu, 28 Sep 2023 16:52:38 -0400 Subject: [PATCH 08/30] Same IO Id condition for outputs than inputs. Closes #91 --- ui/src/components/PipelineEditor/PipelineEditor.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ui/src/components/PipelineEditor/PipelineEditor.js b/ui/src/components/PipelineEditor/PipelineEditor.js index 0601a7ad..27d49e27 100644 --- a/ui/src/components/PipelineEditor/PipelineEditor.js +++ b/ui/src/components/PipelineEditor/PipelineEditor.js @@ -532,8 +532,12 @@ export default function PipelineEditor(props) { outputList.forEach((output) => { // Destructuring copy to leave out fields that are not part of the output description spec. let { file, nodeId, outputId, ...copy } = output; - flow.outputs[toIOId(output.file, output.nodeId, output.outputId)] = - copy; + const id = + file === undefined + ? toIOId("pipeline", output.nodeId) + : toIOId(output.file, output.nodeId, output.outputId); + + flow.outputs[id] = copy; }); // Save the metadata (only if metadata pane was edited) From bbb26da8dbb11dbebe810c00701afe199c18a276 Mon Sep 17 00:00:00 2001 From: Jean-Michel Lord Date: Fri, 29 Sep 2023 12:11:09 -0400 Subject: [PATCH 09/30] removed "undefined" css class --- ui/src/components/form/AutoResizeTextArea.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ui/src/components/form/AutoResizeTextArea.js b/ui/src/components/form/AutoResizeTextArea.js index 8124cf00..dcd3a2e8 100644 --- a/ui/src/components/form/AutoResizeTextArea.js +++ b/ui/src/components/form/AutoResizeTextArea.js @@ -6,7 +6,7 @@ export default function AutoResizeTextArea({defaultValue, keepWidth, className, useEffect(() => { resize(textAreaRef.current) - }, [defaultValue]) + }, [defaultValue, resize]) /** * Automatic horizontal and vertical resizing of textarea @@ -22,6 +22,6 @@ export default function AutoResizeTextArea({defaultValue, keepWidth, className, } } - return