Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GUVI imaging updates #198

Merged
merged 25 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
30447f0
BUG: fixed load without data
aburrell Jul 20, 2023
2958226
STY: updated labels
aburrell Jul 20, 2023
f72e2ab
ENH: updated clean method
aburrell Jul 20, 2023
d5053c2
BUG: fixed dict format
aburrell Jul 20, 2023
44c6a36
BUG: expanded masked dimensions
aburrell Jul 20, 2023
fcedfed
STY: removed whitespace
aburrell Jul 20, 2023
068d54a
ENH: added concat_data method
aburrell Jul 20, 2023
acc3395
BUG: remove time sort in `concat_data`
aburrell Jul 20, 2023
77668d2
DOC: update changelog
aburrell Jul 20, 2023
c2f7233
STY: updated spacing
aburrell Jul 25, 2023
819d8e1
ENH: added metadata
aburrell Jul 25, 2023
57228d1
Merge branch 'develop' into guvi_imaging_fix
jklenzing Sep 29, 2023
8a91717
Merge branch 'develop' into guvi_imaging_fix
jklenzing Nov 30, 2023
499fe07
MAINT: update rc tests
jklenzing Nov 30, 2023
4f0df46
Merge pull request #219 from pysat/maint/rc_tests
jklenzing Nov 30, 2023
c3638e7
Merge pull request #220 from pysat/develop
jklenzing Dec 5, 2023
959481e
BUG: fix JHUAPL time
aburrell Feb 27, 2024
7c80f0d
STY: removed type equality
aburrell Feb 27, 2024
566244e
TST: added an ignore list for links
aburrell Feb 27, 2024
d175a35
BUG: fixed inners assignment
aburrell Mar 4, 2024
eaaef3b
TST: update GUVI test dates
aburrell Mar 4, 2024
236cb98
Merge branch 'develop' into guvi_imaging_fix
jklenzing Mar 5, 2024
33ed75f
MAINT: remove new test flag from timed_guvi
jklenzing Mar 5, 2024
a6a61fb
Update pysatNASA/instruments/timed_guvi.py
jklenzing Mar 6, 2024
483c8e1
MAINT: only skip high res data
jklenzing Mar 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions .github/workflows/pysat_rc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,22 @@ jobs:
python -c "import pysat; pysat.params['data_dirs'] = 'pysatData'"

- name: Test with pytest
run: pytest -vs --cov=pysatNASA/
run: pytest

- name: Publish results to coveralls
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: coveralls --rcfile=setup.cfg --service=github
COVERALLS_PARALLEL: true
run: coveralls --rcfile=pyproject.toml --service=github

finish:
name: Finish Coverage Analysis
needs: build
runs-on: ubuntu-latest
steps:
- name: Coveralls Finished
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
pip install --upgrade coveralls
coveralls --service=github --finish
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@ This project adheres to [Semantic Versioning](https://semver.org/).
* REACH Dosimeter
* New Features
* Allow files to be unzipped after download
* Added custom `concat_data` method to TIMED-GUVI data
* Added cleaning to TIMED-GUVI SDR imaging data
* Bug Fixes
* Fix general clean routine to skip transformation matrices
* New window needs to be integer for calculate_imf_steadiness
* Fixed version import
* Fixed a bug when data fails to load for CDF pandas objects
* Allow graceful failure with no files in jhuapl load functions
* New window needs to be integer for calculate_imf_steadiness
* Fixed a bug where cdas_download may drop the requested end date file
* Documentation
* Added example of how to export data for archival
Expand Down
3 changes: 3 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,3 +175,6 @@

# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}

# Links to ignore, as they require human interaction
linkcheck_ignore = [r'https://saber.gats-inc.com/temp_errors.php']
94 changes: 57 additions & 37 deletions pysatNASA/instruments/methods/jhuapl.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import pandas as pds
import xarray as xr

import pysat
from pysat.utils.coords import expand_xarray_dims
from pysat.utils.io import load_netcdf

Expand Down Expand Up @@ -42,14 +43,15 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'):
for i, sec in enumerate(data[skey].values)]
secs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60)))
for i, sec in enumerate(data[skey].values)]
microsecs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60
- secs[i]) * 1.0e6))
for i, sec in enumerate(data[skey].values)]
dtimes = [
dt.datetime.strptime(
"{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06.0f}".format(
"{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06d}".format(
int(data[ykey].values[i]), int(data[dkey].values[i]),
hours[i], mins[i], secs[i],
(sec - hours[i] * 3600 - mins[i] * 60 - secs[i]) * 1.0e6),
'%Y-%j-%H-%M-%S-%f')
for i, sec in enumerate(data[skey].values)]
hours[i], mins[i], secs[i], microsec), '%Y-%j-%H-%M-%S-%f')
for i, microsec in enumerate(microsecs)]
else:
dtimes = [
dt.datetime.strptime("{:4d}-{:03d}".format(
Expand Down Expand Up @@ -100,8 +102,13 @@ def load_edr_aurora(fnames, tag='', inst_id='', pandas_format=False,
inst.load(2003, 1)

"""
# Initialize the output
mdata = pysat.Meta()
data = xr.Dataset()

# Define the input variables
labels = {'units': ('UNITS', str), 'desc': ('TITLE', str)}
labels = {mdata.labels.units: ('UNITS', str),
mdata.labels.desc: ('TITLE', str)}

# CDAWeb stores these files in the NetCDF format instead of the CDF format
single_data = list()
Expand Down Expand Up @@ -140,12 +147,13 @@ def load_edr_aurora(fnames, tag='', inst_id='', pandas_format=False,
# Update the fill value, using information from the global header
mdata[var] = {mdata.labels.fill_val: mdata.header.NO_DATA_IN_BIN_VALUE}

# After loading all the data, determine which dimensions need to be
# expanded. Pad the data so that all dimensions are the same shape.
single_data = expand_xarray_dims(single_data, mdata, dims_equal=False)
if len(single_data) > 0:
# After loading all the data, determine which dimensions need to be
# expanded. Pad the data so that all dimensions are the same shape.
single_data = expand_xarray_dims(single_data, mdata, dims_equal=False)

# Combine all the data, indexing along time
data = xr.combine_by_coords(single_data)
# Combine all the data, indexing along time
data = xr.combine_by_coords(single_data)

return data, mdata

Expand All @@ -168,7 +176,7 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
strict_dim_check : bool
Used for xarray data (`pandas_format` is False). If True, warn the user
that the desired epoch, 'TIME_DAY', is not present as a dimension in the
NetCDF file. If False, no warning is raised. (default=True)```
NetCDF file. If False, no warning is raised. (default=True)
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
Expand All @@ -193,8 +201,13 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
inst.load(2003, 1)

"""
# Initialize the output
mdata = pysat.Meta()
data = xr.Dataset()

# Define the input variables and working variables
labels = {'units': ('UNITS', str), 'desc': ('TITLE', str)}
labels = {mdata.labels.units: ('UNITS', str),
mdata.labels.desc: ('TITLE', str)}
load_time = 'TIME_DAY'
time_vars = ['YEAR_DAY', 'DOY_DAY', 'TIME_EPOCH_DAY', 'YEAR_NIGHT',
'DOY_NIGHT', 'TIME_NIGHT', 'TIME_EPOCH_NIGHT']
Expand Down Expand Up @@ -304,32 +317,39 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False,
# Update the fill value, using information from the global header
mdata[var] = {mdata.labels.fill_val: mdata.header.NO_DATA_IN_BIN_VALUE}

# Combine all time dimensions
if combine_times:
data_list = expand_xarray_dims([inners[dim] if dim == 'time' else
inners[dim].rename_dims({dim: 'time'})
for dim in time_dims], mdata,
dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]
# Add metadata for 'time_auroral' and 'nCross' variables
mdata['time_auroral'] = {'desc': 'Auroral time index'}
mdata['nCross'] = {'desc': 'Number of cross-track observations'}

# Combine all the data, indexing along time
data = xr.merge(data_list)
# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = expand_xarray_dims(
[inners[dim] if dim == 'time' else
inners[dim].rename_dims({dim: 'time'})
for dim in time_dims], mdata, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Set additional coordinates
data = data.set_coords(coords).assign_coords({'time': data['time']})
if tag == 'sdr-imaging':
data = data.assign_coords(
{'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort", "LBHlong"],
"nchanAur": ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
"LBHlong"],
"nCross": sdata.nCross.data,
"nCrossDayAur": sdata.nCrossDayAur.data})
elif tag == 'sdr-spectrograph':
data = data.assign_coords({"nchan": ["121.6nm", "130.4nm", "135.6nm",
"LBHshort", "LBHlong", "?"]})
# Combine all the data, indexing along time
data = xr.merge(data_list)

# Ensure the data is ordered correctly
data = data.sortby('time')
# Set additional coordinates
data = data.set_coords(coords).assign_coords({'time': data['time']})
if tag == 'sdr-imaging':
data = data.assign_coords(
{'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
"LBHlong"],
"nchanAur": ["121.6nm", "130.4nm", "135.6nm", "LBHshort",
"LBHlong"],
"nCross": sdata.nCross.data,
"nCrossDayAur": sdata.nCrossDayAur.data})
elif tag == 'sdr-spectrograph':
data = data.assign_coords({"nchan": ["121.6nm", "130.4nm",
"135.6nm", "LBHshort",
"LBHlong", "?"]})

# Ensure the data is ordered correctly
data = data.sortby('time')

return data, mdata
140 changes: 133 additions & 7 deletions pysatNASA/instruments/timed_guvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@

import datetime as dt
import functools
import xarray as xr

import pysat
from pysat.instruments.methods import general as mm_gen

from pysatNASA.instruments.methods import cdaweb as cdw
Expand All @@ -86,26 +88,150 @@
# ----------------------------------------------------------------------------
# Instrument test attributes

_test_dates = {iid: {tag: dt.datetime(2005, 6, 28) for tag in inst_ids[iid]}
for iid in inst_ids.keys()}
_test_dates = {
iid: {tag: dt.datetime(2007 if tag.find('spectrograph') > 0 else 2005, 12,
13) for tag in inst_ids[iid]}
for iid in inst_ids.keys()}
_test_load_opt = {iid: {tag: {'combine_times': True}
for tag in inst_ids[iid]}
for iid in ['high_res', 'low_res']}
# TODO(#218): Remove when compliant with multi-day load tests
_new_tests = {iid: {tag: False for tag in inst_ids[iid]}
for iid in ['high_res', 'low_res']}
_new_tests = {'high_res': {tag: False for tag in inst_ids['high_res']}}
_clean_warn = {inst_id: {tag: mm_nasa.clean_warnings
jklenzing marked this conversation as resolved.
Show resolved Hide resolved
for tag in inst_ids[inst_id]}
for tag in inst_ids[inst_id] if tag != 'sdr-imaging'}
for inst_id in inst_ids.keys()}
for inst_id in ['high_res', 'low_res']:
_clean_warn[inst_id]['sdr-imaging'] = {'dirty': mm_nasa.clean_warnings[
'dirty']}

# ----------------------------------------------------------------------------
# Instrument methods

# Use standard init routine
init = functools.partial(mm_nasa.init, module=mm_timed, name=name)

# No cleaning, use standard warning function instead
clean = mm_nasa.clean_warn

def clean(self):
"""Clean TIMED GUVI imaging data.

Note
----
Supports 'clean', 'dusty', 'dirty', 'none'. Method is
not called by pysat if clean_level is None or 'none'.

"""
if self.tag == "sdr-imaging" and self.clean_level in ['clean', 'dusty']:
# Find the flag variables
dqi_vars = [var for var in self.variables if var.find('DQI') == 0]

# Find the variables affected by each flag
dat_vars = {dqi: [var for var in self.variables if var.find(dqi) > 0]
if dqi.find('AURORAL') >= 0 else
[var for var in self.variables if var.find('AURORAL') < 0
and var.find(dqi) > 0] for dqi in dqi_vars}

for dqi in dqi_vars:
if self.clean_level == 'clean':
# For clean, require DQI of zero (MeV noise only)
dqi_bad = self.data[dqi].values > 0
else:
# For dusty, allow the SAA region as well
dqi_bad = self.data[dqi].values > 1

# Apply the DQI mask to the data, replacing bad values with
# appropriate fill values
for dat_var in dat_vars[dqi]:
if self.data[dat_var].shape == dqi_bad.shape or self.data[
dat_var].shape[:-1] == dqi_bad.shape:
# Only apply to data with the correct dimensions
fill_val = self.meta[dat_var, self.meta.labels.fill_val]
self.data[dat_var].values[dqi_bad] = fill_val
else:
# Follow the same warning format as the general clean warning, but
# with additional information.
pysat.logger.warning(' '.join(['No cleaning routines available for',
self.platform, self.name, self.tag,
self.inst_id, 'at clean level',
self.clean_level]))
return


def concat_data(self, new_data, combine_times=False, **kwargs):
"""Concatonate data to self.data for TIMED GUVI data.

Parameters
----------
new_data : xarray.Dataset or list of such objects
New data objects to be concatonated
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
**kwargs : dict
Optional keyword arguments passed to xr.concat

Note
----
For xarray, `dim=Instrument.index.name` is passed along to xarray.concat
except if the user includes a value for dim as a keyword argument.

"""
# Establish the time dimensions by data type
time_dims = [self.index.name]

if self.tag == 'sdr-imaging':
time_dims.append('time_auroral')
elif self.tag == 'sdr-spectrograph':
time_dims.extend(['time_gaim_day', 'time_gaim_night'])

# Concatonate using the appropriate method for the number of time
# dimensions
if len(time_dims) == 1:
# There is only one time dimensions, but other dimensions may
# need to be adjusted
new_data = pysat.utils.coords.expand_xarray_dims(
new_data, self.meta, exclude_dims=time_dims)

# Combine the data
self.data = xr.combine_by_coords(new_data, **kwargs)
else:
inners = None
for ndata in new_data:
# Separate into inner datasets
inner_keys = {dim: [key for key in ndata.keys()
if dim in ndata[key].dims] for dim in time_dims}
inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims}

# Add 'single_var's into 'time' dataset to keep track
sv_keys = [val.name for val in ndata.values()
if 'single_var' in val.dims]
singlevar_set = ndata.get(sv_keys)
inner_dat[self.index.name] = xr.merge([inner_dat[self.index.name],
singlevar_set])

# Concatenate along desired dimension with previous data
if inners is None:
# No previous data, assign the data separated by dimension
inners = dict(inner_dat)
else:
# Concatenate with existing data
inners = {dim: xr.concat([inners[dim], inner_dat[dim]],
dim=dim) for dim in time_dims}

# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = pysat.utils.coords.expand_xarray_dims(
[inners[dim] if dim == self.index.name else
inners[dim].rename_dims({dim: self.index.name})
for dim in time_dims if len(inners[dim].dims) > 0],
self.meta, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Combine all the data, indexing along time
self.data = xr.merge(data_list)
return


# ----------------------------------------------------------------------------
# Instrument functions
Expand Down
Loading