Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow missing/empty cycle time files in timeseries #211

Merged
merged 7 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 10 additions & 15 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
setuptools>=59.4.0
pyyaml>=6.0
pycodestyle>=2.8.0
netCDF4>=1.5.3
matplotlib>=3.9.0
cartopy>=0.21.1
scipy>=1.9.3
xarray>=2022.3.0
pandas>=1.4.0
numpy>=2.0.0

# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack
# versions need to be set to avoid other versions being picked
pyproj==3.1.0
importlib-metadata==4.8.2
setuptools
pyyaml
pycodestyle
netCDF4
matplotlib
danholdaway marked this conversation as resolved.
Show resolved Hide resolved
cartopy
scipy
xarray
pandas
numpy

# Additional packages
git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy
Expand Down
26 changes: 26 additions & 0 deletions requirements_gmao.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
setuptools>=59.4.0
pyyaml>=6.0
pycodestyle>=2.8.0
netCDF4
matplotlib
cartopy>=0.21.1
scipy>=1.9.3
xarray>=2022.3.0
pandas>=1.4.0
numpy==1.22.3

# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack
# versions need to be set to avoid other versions being picked
pyproj
importlib-metadata==4.8.2
contourpy==1.0.7

# Additional packages
git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy
scikit-learn
seaborn
hvplot
nbconvert
bokeh
geopandas
geoviews
28 changes: 28 additions & 0 deletions requirements_sles15.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
setuptools>=59.4.0
pyyaml>=6.0
pycodestyle>=2.8.0
netCDF4
matplotlib
cartopy>=0.21.1
scipy>=1.9.3
xarray>=2022.3.0
pandas>=1.4.0
numpy==1.22.3
attrs==21.4.0

# Not explicitly part of eva but dependcies of eva dependencies already in spack-stack
# versions need to be set to avoid other versions being picked
pyproj
importlib_metadata==7.1.0
contourpy==1.0.7
msgpack>=1.0.0

# Additional packages
git+https://github.com/NOAA-EMC/emcpy.git@f7b863d9508b921a78d7ff0e53de0b95e9a176f7#egg=emcpy
scikit-learn
seaborn
hvplot
nbconvert
bokeh
geopandas
geoviews
29 changes: 23 additions & 6 deletions src/eva/eva_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,22 @@
import argparse
import os
from collections import defaultdict
import xarray as xr
import numpy as np

from eva.utilities.config import get
from eva.utilities.logger import Logger
from eva.utilities.timing import Timing
from eva.data.data_driver import data_driver
from eva.time_series.time_series import add_empty_to_timeseries
from eva.time_series.time_series import collapse_collection_to_time_series
from eva.time_series.time_series_utils import create_empty_data, get_filename, check_file
from eva.transforms.transform_driver import transform_driver
from eva.plotting.batch.base.plot_tools.figure_driver import figure_driver
from eva.data.data_collections import DataCollections
from eva.utilities.duration import iso_duration_to_timedelta
from eva.utilities.utils import load_yaml_file


# --------------------------------------------------------------------------------------------------


Expand Down Expand Up @@ -160,14 +163,29 @@ def read_transform_time_series(logger, timing, eva_dict, data_collections):
if name == time_series_config['collection']:
transform_dict['transforms'].append(transform)

# Assert that datasets_config is the same length as dates
logger.assert_abort(len(datasets_config) == len(dates), 'When running in time ' +
'series mode the number of datasets must be the same as the ' +
'number of dates.')
# Check if first file is empty. If it is, abort.
empty_dataset_config = datasets_config[0]
filename = get_filename(empty_dataset_config, logger)
check_file(filename, logger)

# Loop over datasets reading each one in turn, internally appending the data_collections
for ind, dataset_config in enumerate(datasets_config):

# Pull out information to check for missing date
date = dates[ind]

# Check if file exists, if not add empty and continue
filename = get_filename(dataset_config, logger)
if not os.path.isfile(filename):
add_empty_to_timeseries(logger, date, ind, timing, time_series_config,
empty_dataset_config, data_collections)
continue
# Check if file exists but is size zero, add empty and continue
elif os.stat(filename).st_size == 0:
add_empty_to_timeseries(logger, date, ind, timing, time_series_config,
empty_dataset_config, data_collections)
continue

# Create a temporary collection for this time step
data_collections_tmp = DataCollections()

Expand All @@ -185,7 +203,6 @@ def read_transform_time_series(logger, timing, eva_dict, data_collections):
timing.stop('TransformDriverExecute')

# Collapse data into time series
date = dates[ind]
collapse_collection_to_time_series(logger, ind, date, time_series_config,
data_collections, data_collections_tmp)

Expand Down
11 changes: 10 additions & 1 deletion src/eva/tests/config/testIodaObsSpaceAmsuaN19_TimeSeries.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ datasets:
- name: ObsValue
variables: &variables [brightnessTemperature]
- name: hofx
#Empty
- name: experiment
type: IodaObsSpace
filenames:
- ${data_input_path}/ioda_obs_space.amsua_n19.hofx.2020-12-14T000000Z.nc4
channels: *channels
groups:
- name: ObsValue
- name: hofx
- name: experiment
type: IodaObsSpace
filenames:
Expand All @@ -31,7 +40,7 @@ transforms:
time_series:

- begin_date: '2020-12-14T21:00:00'
final_date: '2020-12-15T03:00:00'
final_date: '2020-12-15T09:00:00'
interval: 'PT6H'

collection: experiment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@ datasets:
bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.satbias
lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-11T21:00:00Z.tlapse

# Empty
- name: experiment
type: JediVariationalBiasCorrection
bias_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-12T03:00:00Z.satbias
lapse_file: ${data_input_path}/gsi.x0048v2.bc.amsua_n19.2021-12-12T03:00:00Z.tlapse

time_series:

- begin_date: '2020-12-15T00:00:00'
final_date: '2020-12-15T06:00:00'
- begin_date: '2021-12-11T15:00:00'
final_date: '2021-12-12T03:00:00'
interval: 'PT6H'

collection: experiment
Expand Down
14 changes: 13 additions & 1 deletion src/eva/time_series/time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

import numpy as np
import xarray as xr

from eva.data.data_collections import DataCollections
from eva.time_series.time_series_utils import create_empty_data

# --------------------------------------------------------------------------------------------------

Expand All @@ -23,6 +24,17 @@
}


# --------------------------------------------------------------------------------------------------

def add_empty_to_timeseries(logger, date, ind, timing, time_series_config,
dataset_config, data_collections):

''' Add empty collection to timeseries for missing date '''
empty_data_collection = create_empty_data(time_series_config, dataset_config, timing, logger)
collapse_collection_to_time_series(logger, ind, date, time_series_config, data_collections,
empty_data_collection)


# --------------------------------------------------------------------------------------------------


Expand Down
43 changes: 43 additions & 0 deletions src/eva/time_series/time_series_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import os
import numpy as np
import xarray as xr
from eva.data.data_driver import data_driver
from eva.data.data_collections import DataCollections


filename_retrieval = {
"IodaObsSpace": lambda dataset_config: dataset_config["filenames"][0],
"JediVariationalBiasCorrection": lambda dataset_config: dataset_config["bias_file"],
}


def get_filename(dataset_config, logger):
""" Retrieve filename using given type """

dataset_type = dataset_config["type"]
logger.assert_abort(dataset_type in filename_retrieval,
f'Unknown dataset_type {dataset_type}')
filename = filename_retrieval[dataset_type](dataset_config)
return filename


def check_file(filename, logger):
""" Check if first file exists and is nonzero """

if not os.path.isfile(filename):
logger.abort('First file provided to timeseries must exist.')
elif os.stat(filename).st_size == 0:
logger.abort('First file provided to timeseries must be nonzero.')


def create_empty_data(timeseries_config, dataset_config, timing, logger):
""" Creating an empty data collection to use for missing cycle times """

dc_tmp = DataCollections()
collection = timeseries_config["collection"]
data_driver(dataset_config, dc_tmp, timing, logger)
dataset = dc_tmp.get_data_collection(collection)
empty_data = xr.full_like(dataset, np.nan)
dc = DataCollections()
dc.create_or_add_to_collection(collection, empty_data)
return dc
1 change: 0 additions & 1 deletion src/eva/utilities/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

# --------------------------------------------------------------------------------------------------


import re
import string
import yaml
Expand Down
Loading