Skip to content

Commit

Permalink
Merge pull request #114 from PALEOtoolkit/get_data_fix
Browse files Browse the repository at this point in the history
Define and test behaviour of get_data
  • Loading branch information
sjdaines authored Jan 5, 2025
2 parents fa2b625 + a2a9876 commit 75ccea0
Show file tree
Hide file tree
Showing 5 changed files with 290 additions and 96 deletions.
1 change: 1 addition & 0 deletions docs/src/PALEOmodelOutput.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ PB.has_variable(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString
PALEOmodel.get_array(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString, allselectargs::NamedTuple; kwargs...)
PB.get_field(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString)
PB.get_data(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString; records=nothing)
PB.get_data(fr::PALEOmodel.FieldRecord; records=nothing)
PB.get_mesh(output::PALEOmodel.AbstractOutputWriter, domainname::AbstractString)
```
```@meta
Expand Down
64 changes: 47 additions & 17 deletions src/FieldRecord.jl
Original file line number Diff line number Diff line change
Expand Up @@ -230,26 +230,53 @@ function Base.copy(fr::FieldRecord{FieldData, Space, V, N, Mesh, R}) where {Fiel
end

"""
PB.get_data(fr::FieldRecord; records=nothing)
PB.get_data(fr::FieldRecord; records=nothing, squeeze_all_single_dims=true)
Get data records in raw format.
Get data records in raw format. Only recommended for variables with scalar data ie one value per record.
`records` may be `nothing` to get all records,
an `Int` to select a single record, or a range to select multiple records.
If `squeeze_all_single_dims=true` (the default), if each record represents a scalar
(eg a PALEO Variable with Space PB.ScalarSpace, or a PB.CellSpace variable in a Domain with
a single cell), then data is returned as a Julia Vector. NB: if `records` is an Int,
the single record requested is returned as a length-1 Vector.
Non-scalar data (eg a non-ScalarSpace variable from a Domain with > 1 cell)
is returned in internal format as a Vector-of-Vectors.
"""
function PB.get_data(fr::FieldRecord; records=nothing)
function PB.get_data(fr::FieldRecord; records=nothing, squeeze_all_single_dims=true)

if isnothing(records)
data_output = fr.records
else
# bodge - fix scalar data
# if isa(records, Integer) && !isa(data_output, AbstractVector)
# data_output =[data_output]
#
if isa(records, Integer) && field_single_element(fr)
# represent a scalar as a length 1 Vector
# (a 0D Array might be more logical, but for backwards compatibility keep as a Vector)
data_output =[fr.records[records]]
# Optionally squeeze out single cell stored internally as a Vector-of-Vectors, length 1
# (eg a CellSpace Variable in a Domain with 1 cell)
squeeze_vecvec = squeeze_all_single_dims && !isempty(fr.records) && length(first(fr.records)) == 1
if field_single_element(fr) || squeeze_vecvec
if field_single_element(fr)
# internal format already is a Vector
records_vec = fr.records
else
# combine Vector of length 1 Vectors into a Vector
records_vec = [only(r) for r in fr.records]
end
if isnothing(records)
data_output = records_vec
else
# bodge - fix scalar data
# if isa(records, Integer) && !isa(data_output, AbstractVector)
# data_output =[data_output]
#
if isa(records, Integer)
# represent a scalar as a length 1 Vector
# (a 0D Array might be more logical, but for backwards compatibility keep as a Vector)
data_output =[records_vec[records]]
else
data_output = records_vec[records]
end
end
else
# Vector-of-Vectors - return raw data
if isnothing(records)
data_output = fr.records
else
data_output = fr.records[records]
end
Expand Down Expand Up @@ -467,8 +494,7 @@ function get_array(

# get record indices to use
ridx_to_use = select_indices[recorddimidx]
have_recorddim = !isnothing(dims_coords[recorddimidx])


# Non-record dimensions

# read non-record coordinates, from first record selected
Expand Down Expand Up @@ -510,7 +536,11 @@ function get_array(
select_indices[i] = first(select_indices[i])
end
end
# record dimension may have just been squeezed out
ridx_to_use = select_indices[recorddimidx]
end

have_recorddim = !isnothing(dims_coords[recorddimidx])

dims_coords_sq = Pair{PB.NamedDimension, Vector{FieldArray}}[dc for dc in dims_coords if !isnothing(dc)]
dims_sq = [d for (d, c) in dims_coords_sq]
Expand Down Expand Up @@ -566,7 +596,7 @@ function get_array(
_fill_array_from_records(avalues, Tuple(nonrecordindicies_sq), fr.records, expand_fn, ridx_to_use, Tuple(nonrecordindicies))
else
if isempty(nonrecordindicies_sq)
avalues[] .= @view expand_fn(fr.records[ridx_to_use])[nonrecordindicies...]
avalues[] = expand_fn(fr.records[ridx_to_use])[nonrecordindicies...]
else
avalues[nonrecordindicies_sq...] .= @view expand_fn(fr.records[ridx_to_use])[nonrecordindicies...]
end
Expand Down
49 changes: 26 additions & 23 deletions src/OutputWriters.jl
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,19 @@ Return the [`PALEOmodel.FieldRecord`](@ref) for `varname`.
function PB.get_field(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString) end

"""
get_data(output::PALEOmodel.AbstractOutputWriter, varname; records=nothing) -> values
get_data(output::PALEOmodel.AbstractOutputWriter, varname; records=nothing, kwargs...) -> values
Get Variable `varname` raw data array, optionally restricting to `records`
Get Variable `varname` raw data array, optionally restricting to `records`.
Equivalent to `PB.get_data(PB.get_field(output, varname); records, kwargs...)`,
see [`PB.get_data(fr::PALEOmodel.FieldRecord)`](@ref).
"""
function PB.get_data(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString; records=nothing) end
function PB.get_data(output::PALEOmodel.AbstractOutputWriter, varname::AbstractString; records=nothing, kwargs...)

fr = PB.get_field(output, varname)

return PB.get_data(fr; records, kwargs...)
end

"""
get_mesh(output::PALEOmodel.AbstractOutputWriter, domainname::AbstractString) -> grid::Union{AbstractMesh, Nothing}
Expand Down Expand Up @@ -464,12 +472,12 @@ function PB.get_field(odom::OutputMemoryDomain, varname_or_varnamefull::Abstract
return fr
end

function PB.get_data(output::OutputMemoryDomain, varname::AbstractString; records=nothing)
function PB.get_data(odom::OutputMemoryDomain, varname::AbstractString; records=nothing, kwargs...)
@warn "get_data(odom::OutputMemoryDomain, ...) is deprecated, Domain name $(odom.name) varname $varname"
fr = PB.get_field(odom, varname)

fr = PB.get_field(output, varname)

return PB.get_data(fr; records)
end
return PALEOmodel.get_data(fr; records, kwargs...)
end

function PB.show_variables(
odom::OutputMemoryDomain;
Expand All @@ -495,15 +503,19 @@ end


function PB.get_table(
odom::OutputMemoryDomain, varnames::Vector{<:AbstractString} = AbstractString[],
odom::OutputMemoryDomain, varnames::Vector{<:AbstractString} = AbstractString[];
squeeze_all_single_dims=true,
)
df = DataFrames.DataFrame(
[k => v.records for (k, v) in odom.data if (isempty(varnames) || string(k) in varnames)]
# [k => v.records for (k, v) in odom.data if (isempty(varnames) || string(k) in varnames)]
[vn => PB.get_data(fr; squeeze_all_single_dims) for (vn, fr) in odom.data if (isempty(varnames) || string(vn) in varnames)]
)

return df
end



function PB.get_dimensions(odom::OutputMemoryDomain)
spatial_dims = isnothing(odom.grid) ? PB.NamedDimension[] : PB.get_dimensions(odom.grid)
return vcat(spatial_dims, odom.data_dims, odom.record_dim)
Expand Down Expand Up @@ -597,14 +609,16 @@ function PB.get_table(output::OutputMemory, domainname::AbstractString, varnames
return PB.get_table(output.domains[domainname], varnames)
end

function PB.get_table(output::OutputMemory, varnamefulls::Vector{<:AbstractString})
function PB.get_table(output::OutputMemory, varnamefulls::Vector{<:AbstractString};
squeeze_all_single_dims=true,
)
df = DataFrames.DataFrame()

for varnamefull in varnamefulls
vdom, varname = domain_variable_name(varnamefull)
if haskey(output.domains, vdom)
if PB.has_variable(output.domains[vdom], varname)
vardata = PB.get_data(output.domains[vdom], varname)
vardata = PB.get_data(output.domains[vdom], varname; squeeze_all_single_dims=true)
df = DataFrames.insertcols!(df, varnamefull=>vardata)
else
@warn "no Variable found for $varnamefull"
Expand Down Expand Up @@ -772,17 +786,6 @@ function PB.add_field!(output::OutputMemory, fr::PALEOmodel.FieldRecord)
return PB.add_field!(output.domains[domainname], fr)
end

function PB.get_data(output::OutputMemory, varnamefull::AbstractString; records=nothing)

domainname, varname = domain_variable_name(varnamefull, defaultdomainname=nothing)

haskey(output.domains, domainname) ||
error("Variable $varnamefull not found in output: domain $(domainname) not present")

odom = output.domains[domainname]

return PB.get_data(odom, varname; records)
end


###########################
Expand Down
Loading

0 comments on commit 75ccea0

Please sign in to comment.