Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Edit ADRIA.Domain() and generalize to incorporate loading of GBR-wide domain package #904

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
2 changes: 1 addition & 1 deletion src/ADRIA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ export RMEResultSet
# export dims, ndims

# List out compatible domain datapackages
const COMPAT_DPKG = ["0.7.0-rc", "0.7.0"]
const COMPAT_DPKG = ["0.7.0-rc", "0.7.0", "0.1.0-gbr"]
# This adds ~30 seconds to package load times
if ccall(:jl_generating_output, Cint, ()) == 1
Base.precompile(Tuple{typeof(load_domain),String}) # time: 19.120537
Expand Down
52 changes: 45 additions & 7 deletions src/ExtInterface/ADRIA/Domain.jl
Copy link
Collaborator

@DanTanAtAims DanTanAtAims Jan 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The location of the geopackage is correctly documented in the datapackage.json file, how load_domain does not currently use this. This causes an error during load_domain.

ERROR: Provided location data path is not valid or missing: C:/Users/dtan/data/GBR_2024_10_15\spatial\GBR_2024_10_15.gpkg.
Stacktrace:

Note: This Line, ADRIA currently uses the name of the directory as the name of the geopackage, this should be changed to use the path provided in datapackage.json.

This information is stored in the Moore Domain, however the most recent version v070_rc1 incorrectly lists the spatial gpkg as v060_rc1

Copy link
Collaborator

@DanTanAtAims DanTanAtAims Jan 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some ADRIA functions assumes the column area exists in the geopackage dataframe. The canonical reefs gpkg currently labels this column as ReefMod_area_m2. This prevents the model from running.

ERROR: ArgumentError: column name :area not found in the data frame

Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,10 @@ function Domain(
timeframe
)

# Sort data to maintain consistent order
sort!(location_data, Symbol[Symbol(location_id_col)])
if cluster_id_col == "cluster_id"
# Sort data to maintain consistent order (only if domain is cluster-scale).
sort!(location_data, Symbol[Symbol(location_id_col)])
end
u_sids::Vector{String} = string.(collect(location_data[!, location_id_col]))
# If location id column is missing then derive it from the Unique IDs
if !in(location_id_col, names(location_data))
Expand All @@ -173,7 +175,25 @@ function Domain(
location_data = location_data[
coalesce.(in.(conn_ids, [connectivity.loc_ids]), false), (:)
]
location_data.k .= location_data.k / 100.0 # Make `k` non-dimensional (provided as a percent)
if ("k" ∉ names(location_data)) &
("ReefMod_habitable_proportion" ∈ names(location_data))
@warn "
k column not found in gbr-wide canonical-reefs gpkg.
# Defaulting to ReefMod_habitable_proportion (in proportion 0-1 scale).
"
rename!(location_data, :ReefMod_habitable_proportion => :k)
else
location_data.k .= location_data.k / 100.0 # Make `k` non-dimensional (provided as a percent in cluster-scale data).
end

if ("area" ∉ names(location_data)) &
("ReefMod_area_m2" ∈ names(location_data))
@warn "
# area column not found in gbr-wide canonical-reefs gpkg.
# Defaulting to ReefMod_area_m2 (Total possible coral area in m2).
"
rename!(location_data, :ReefMod_area_m2 => :area)
end

n_locs::Int64 = nrow(location_data)
n_groups::Int64, n_sizes::Int64 = size(linear_extensions())
Expand All @@ -191,7 +211,8 @@ function Domain(
waves = load_env_data(waves_params...)

cyc_params =
ispath(cyclone_mortality_fn) ? (cyclone_mortality_fn,) : (timeframe, location_data)
ispath(cyclone_mortality_fn) ? (cyclone_mortality_fn,) :
(timeframe, location_data, location_id_col)
cyclone_mortality = load_cyclone_mortality(cyc_params...)

# Add compatability with non-migrated datasets but always default current coral spec
Expand All @@ -207,6 +228,14 @@ function Domain(
msg::String = "Provided time frame must match timesteps in DHW and wave data"
msg = msg * "\n Got: $(length(timeframe)) | $(size(dhw, 1)) | $(size(waves, 1))"

# ReefMod DHW timeseries data (2000:2100) do not match the length of waves/cyclone
# data (2025:2099). Can be fixed when AIMS-sourced DHW trajectories become available.
Comment on lines +231 to +232
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# ReefMod DHW timeseries data (2000:2100) do not match the length of waves/cyclone
# data (2025:2099). Can be fixed when AIMS-sourced DHW trajectories become available.
# ReefMod DHW timeseries data (2000:2100) do not match the length of waves/cyclone
# data (2025:2099). Can be fixed when AIMS-sourced DHW trajectories become available.

Whitespace

if size(dhw, 1) > size(waves, 1)
first_year = findfirst(collect(2000:2100) .== first(timeframe))
last_year = findfirst(collect(2000:2100) .== last(timeframe))
dhw = dhw[first_year:last_year, :, :]
end

@assert length(timeframe) == size(dhw, 1) == size(waves, 1) msg

return Domain(
Expand Down Expand Up @@ -245,6 +274,8 @@ function load_domain(::Type{ADRIADomain}, path::String, rcp::String)::ADRIADomai
end

dpkg_details::Dict{String,Any} = _load_dpkg(path)
location_id_col = _get_id_col(dpkg_details)
cluster_id_col::String = "cluster_id"

# Handle compatibility
# Extract the time frame represented in this data package
Expand All @@ -262,9 +293,6 @@ function load_domain(::Type{ADRIADomain}, path::String, rcp::String)::ADRIADomai
timeframe = parse.(Int64, md_timeframe)
end

location_id_col::String = "reef_siteid"
cluster_id_col::String = "cluster_id"

conn_path::String = joinpath(path, "connectivity/")
spatial_path::String = joinpath(path, "spatial")

Expand All @@ -290,6 +318,7 @@ function load_domain(::Type{ADRIADomain}, path::String, rcp::String)::ADRIADomai
cyclone_mortality_fn
)
end

function load_domain(path::String, rcp::String)::ADRIADomain
return load_domain(ADRIADomain, path, rcp)
end
Expand All @@ -307,6 +336,15 @@ function get_wave_data(d::ADRIADomain, RCP::String)::String
return joinpath(d.env_layer_md.dpkg_path, "waves", "wave_RCP$(RCP).nc")
end

function _get_id_col(dpkg_details)
spatial_resources = findfirst(
[x["name"] for x in dpkg_details["resources"]] .== "spatial_data"
)
spatial_resources = dpkg_details["resources"][spatial_resources]

return first(spatial_resources["data"])
end

"""
switch_RCPs!(d::Domain, RCP::String)::Domain

Expand Down
6 changes: 4 additions & 2 deletions src/io/inputs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,12 @@ function load_cyclone_mortality(data_fn::String)::YAXArray
cyclone_cube::YAXArray = Cube(data_fn)
return sort_axis(cyclone_cube, :locations)
end
function load_cyclone_mortality(timeframe::Vector{Int64}, loc_data::DataFrame)::YAXArray
function load_cyclone_mortality(
timeframe::Vector{Int64}, loc_data::DataFrame, location_id_col::String
)::YAXArray
return ZeroDataCube(;
timesteps=1:length(timeframe),
locations=sort(loc_data.reef_siteid),
locations=loc_data[:, location_id_col],
species=ADRIA.coral_spec().taxa_names,
scenarios=[1]
)
Expand Down
Loading