Skip to content

Commit

Permalink
Feature/adjust time function (#272)
Browse files Browse the repository at this point in the history
* update get time functions

* update to fix pylint flake8

* update project dependencies

* fix tests

* update libraries for snyk

* poetry update

* update pytoml

* update test retry time

* add harmony deployment into github actions

* update harmony deployment script

* update pytoml version to develop

* update tests delays

* fix harmony deploy url

* removed debug print statement
  • Loading branch information
sliu008 authored Jun 25, 2024
1 parent 26be201 commit 55ae1e0
Show file tree
Hide file tree
Showing 9 changed files with 432 additions and 284 deletions.
14 changes: 13 additions & 1 deletion .github/workflows/build-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
poetry run flake8 podaac
- name: Test and coverage
run: |
poetry run pytest --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
poetry run pytest -n auto --junitxml=build/reports/pytest.xml --cov=podaac/ --cov-report=xml:build/reports/coverage.xml -m "not aws and not integration" tests/
- name: SonarCloud Scan
id: sonarcloud
uses: sonarsource/sonarcloud-github-action@master
Expand Down Expand Up @@ -295,3 +295,15 @@ jobs:
git config user.email "${GITHUB_ACTOR}@users.noreply.github.com"
git tag -a "${{ env.software_version }}" -m "Version ${{ env.software_version }}"
git push origin "${{ env.software_version }}"
- name: Deploy Harmony
env:
ENV: ${{ env.venue }}
CMR_USER: ${{ secrets.CMR_USER }}
CMR_PASS: ${{ secrets.CMR_PASS }}
if: |
github.ref == 'refs/heads/main' ||
startsWith(github.ref, 'refs/heads/release')
working-directory: deployment
run:
poetry run python harmony_deploy.py --tag ${{ env.software_version }}
67 changes: 67 additions & 0 deletions deployment/harmony_deploy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import requests
import json
import logging
import argparse
from requests.auth import HTTPBasicAuth

# Environment variables
ENV = os.getenv('ENV')
CMR_USER = os.getenv('CMR_USER')
CMR_PASS = os.getenv('CMR_PASS')

def bearer_token() -> str:
tokens = []
headers = {'Accept': 'application/json'}
url = f"https://{'uat.' if ENV == 'uat' else ''}urs.earthdata.nasa.gov/api/users"

# First just try to get a token that already exists
try:
resp = requests.get(url + "/tokens", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
response_content = json.loads(resp.content)

for x in response_content:
tokens.append(x['access_token'])

except Exception: # noqa E722
logging.warning("Error getting the token - check user name and password", exc_info=True)

# No tokens exist, try to create one
if not tokens:
try:
resp = requests.post(url + "/token", headers=headers, auth=HTTPBasicAuth(CMR_USER, CMR_PASS))
response_content = json.loads(resp.content)
tokens.append(response_content['access_token'])
except Exception: # noqa E722
logging.warning("Error getting the token - check user name and password", exc_info=True)

# If still no token, then we can't do anything
if not tokens:
raise RuntimeError("Unable to get bearer token from EDL")

return next(iter(tokens))

if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Update the service image tag.")
parser.add_argument("--tag", help="The new tag version to update.", required=True)
args = parser.parse_args()

url = f"https://harmony.{'uat.' if ENV == 'uat' else ''}earthdata.nasa.gov/service-image-tag/podaac-l2-subsetter"
token = bearer_token()

headers = {
"Authorization": f"Bearer {token}",
"Content-type": "application/json"
}
data = {
"tag": args.tag
}

response = requests.put(url, headers=headers, json=data)

print(response.status_code)
try:
print(response.json())
except json.JSONDecodeError:
print("Response content is not in JSON format")
4 changes: 2 additions & 2 deletions podaac/subsetter/dimension_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ def recreate_pixcore_dimensions(datasets: list):
dim_dict = {}
count = 0
for dataset in datasets:
dim_list_shape = list(dataset.dims.values())
current_dims = list(dataset.dims.keys())
dim_list_shape = list(dataset.sizes.values())
current_dims = list(dataset.sizes.keys())
rename_list = []
for current_dim, dim_value in zip(current_dims, dim_list_shape):
if current_dim not in dim_dict:
Expand Down
5 changes: 2 additions & 3 deletions podaac/subsetter/group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,11 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta
for group in groups:
base_dataset.createGroup(group)

for dim_name in list(dataset.dims.keys()):
for dim_name in list(dataset.sizes.keys()):
new_dim_name = dim_name.split(GROUP_DELIM)[-1]
dim_group = _get_nested_group(base_dataset, dim_name)
if new_dim_name not in dim_group.dimensions:
dim_group.createDimension(new_dim_name, dataset.dims[dim_name])

dim_group.createDimension(new_dim_name, dataset.sizes[dim_name])
# Rename variables
_rename_variables(dataset, base_dataset, start_date, time_vars)

Expand Down
16 changes: 13 additions & 3 deletions podaac/subsetter/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import json
import operator
import os
import re
from itertools import zip_longest
from typing import List, Optional, Tuple, Union
import dateutil
Expand Down Expand Up @@ -270,11 +271,11 @@ def calculate_chunks(dataset: xr.Dataset) -> dict:
"""
if len(dataset.dims) <= 3:
chunk = {dim: 4000 for dim in dataset.dims
if dataset.dims[dim] > 4000
if dataset.sizes[dim] > 4000
and len(dataset.dims) > 1}
else:
chunk = {dim: 500 for dim in dataset.dims
if dataset.dims[dim] > 500}
if dataset.sizes[dim] > 500}

return chunk

Expand Down Expand Up @@ -528,7 +529,7 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
return time_vars[0]

# Filter variables with 'time' in the name to avoid extra work
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.dims.keys()))
time_vars = list(filter(lambda var_name: 'time' in var_name, dataset.sizes.keys()))

for var_name in time_vars:
if var_name not in total_time_vars and "time" in var_name and dataset[var_name].squeeze().dims == lat_var.squeeze().dims:
Expand All @@ -542,6 +543,15 @@ def compute_time_variable_name(dataset: xr.Dataset, lat_var: xr.Variable, total_
if var_name not in total_time_vars and ('time' == var_name_time.lower() or 'timeMidScan' == var_name_time) and dataset[var_name].squeeze().dims[0] in lat_var.squeeze().dims:
return var_name

time_units_pattern = re.compile(r"(days|d|hours|hr|h|minutes|min|m|seconds|sec|s) since \d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?")
# Check variables for common time variable indicators
for var_name, var in dataset.variables.items():
# pylint: disable=too-many-boolean-expressions
if ((('standard_name' in var.attrs and var.attrs['standard_name'] == 'time') or
('axis' in var.attrs and var.attrs['axis'] == 'T') or
('units' in var.attrs and time_units_pattern.match(var.attrs['units'])))) and var_name not in total_time_vars:
return var_name

# then check if any variables have 'time' in the string if the above loop doesn't return anything
for var_name in list(dataset.data_vars.keys()):
var_name_time = var_name.strip(GROUP_DELIM).split(GROUP_DELIM)[-1]
Expand Down
Loading

0 comments on commit 55ae1e0

Please sign in to comment.