Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TableDAP normalizer #143

Merged
merged 2 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions metanorm/normalizers/geospaas/tabledap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Normalizer for ERDDAP's tabledap data"""
import dateutil.parser
from collections import OrderedDict

from shapely.geometry import LineString

import metanorm.utils as utils
from .base import GeoSPaaSMetadataNormalizer
from ...errors import MetadataNormalizationError


class TableDAPMetadataNormalizer(GeoSPaaSMetadataNormalizer):
"""Generate the properties of a GeoSPaaS Dataset using tabledap
attributes
"""

@staticmethod
def get_product_attribute(product_metadata, attribute):
"""Extract the value of an attribute from tabledap product
metadata
"""
for row in product_metadata['table']['rows']:
if row[2] == attribute:
return row[4]
raise MetadataNormalizationError(f'Could not find product attribute {attribute}')

@utils.raises(KeyError)
def check(self, raw_metadata):
return 'tabledap' in raw_metadata.get('url', '')

@utils.raises(KeyError)
def get_entry_title(self, raw_metadata):
return self.get_product_attribute(raw_metadata['product_metadata'], 'title')

@utils.raises(KeyError)
def get_entry_id(self, raw_metadata):
return raw_metadata['entry_id']

@utils.raises(KeyError)
def get_summary(self, raw_metadata):
return self.get_product_attribute(raw_metadata['product_metadata'], 'summary')

@utils.raises(KeyError)
def get_time_coverage_start(self, raw_metadata):
return dateutil.parser.parse(raw_metadata['temporal_coverage'][0])

@utils.raises(KeyError)
def get_time_coverage_end(self, raw_metadata):
return dateutil.parser.parse(raw_metadata['temporal_coverage'][1])

@utils.raises(KeyError)
def get_platform(self, raw_metadata):
source = self.get_product_attribute(raw_metadata['product_metadata'], 'source')
platform = utils.get_gcmd_platform(source)
# backwards conpatibility with older GCMD versions
if platform['Short_Name'] == utils.UNKNOWN and source == 'Argo float':
return OrderedDict([
('Basis', 'Water-based Platforms'),
('Category', 'Buoys'),
('Sub_Category', 'Unmoored'),
('Short_Name', 'Argo-Float'),
('Long_Name', '')])
return platform

def get_instrument(self, raw_metadata):
return utils.get_gcmd_instrument('Unknown')

@utils.raises(KeyError)
def get_location_geometry(self, raw_metadata):
return raw_metadata['trajectory']

@utils.raises(KeyError)
def get_provider(self, raw_metadata):
"""Returns a GCMD-like provider data structure"""
institution = self.get_product_attribute(raw_metadata['product_metadata'], 'institution')
provider = utils.get_gcmd_provider([institution])
if provider:
return provider
else:
return OrderedDict([
('Bucket_Level0', 'CONSORTIA/INSTITUTIONS'),
('Bucket_Level1', ''),
('Bucket_Level2', ''),
('Bucket_Level3', ''),
('Short_Name', institution[:100]),
('Long_Name', institution[:250]),
('Data_Center_URL', '')])
3 changes: 2 additions & 1 deletion metanorm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,13 @@ def export_subclasses(package__all__, package_name, package_dir, base_class):
'Sentinel-2B': ('S2B',),
'Sentinel-3A': ('S3A',),
'Sentinel-3B': ('S3B',),
'argo-float': ('Argo float',),
# providers
'ESA/EO': ('ESA',),
'OB.DAAC': ('OB_DAAC',),
'C-SAR': ('SAR-C', 'SAR-C SAR'),
'EUMETSAT/OSISAF': ('EUMETSAT OSI SAF',),
'NSIDC': ('NSIDC_ECS',)
'NSIDC': ('NSIDC_ECS',),
}

def translate_pythesint_keyword(translation_dict, alias):
Expand Down
183 changes: 183 additions & 0 deletions tests/normalizers/test_tabledap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Tests for the tabledap normalizer"""

import unittest
import unittest.mock as mock
from collections import OrderedDict
from datetime import datetime, timezone

import metanorm.normalizers as normalizers
import metanorm.utils as utils
from metanorm.errors import MetadataNormalizationError


class TableDAPMetadataNormalizerTests(unittest.TestCase):
"""Tests for TableDAPMetadataNormalizer"""

def setUp(self):
self.normalizer = normalizers.TableDAPMetadataNormalizer()
self.empty_raw_metadata = {'product_metadata': {'table': {'rows': []}}}
self.raw_metadata = {
'entry_id': '123456',
'url': 'http://foo/tabledap/bar.json',
'temporal_coverage': ('2023-01-01T00:00:00Z', '2023-01-01T12:47:13Z'),
'trajectory': 'LINESTRING (1 2, 3 4)',
'product_metadata': {
'table': {
'columnNames': [
"Row Type", "Variable Name", "Attribute Name", "Data Type", "Value"],
'rows': [
["attribute", "NC_GLOBAL", "cdm_altitude_proxy", "String", "pres"],
["attribute", "NC_GLOBAL", "cdm_data_type", "String", "TrajectoryProfile"],
["attribute", "NC_GLOBAL", "time_coverage_end", "String",
"2026-12-27T14:48:20Z"],
["attribute", "NC_GLOBAL", "time_coverage_start", "String",
"1997-07-28T20:26:20Z"],
["attribute", "NC_GLOBAL", "title", "String", "Argo Float Measurements"],
["attribute", "NC_GLOBAL", "summary", "String",
"Argo float vertical profiles from Coriolis Global Data Assembly Centres"],
["attribute", "NC_GLOBAL", "source", "String", "Argo float"],
["attribute", "NC_GLOBAL", "institution", "String", "Argo"],
]
}
}
}

def test_get_product_attribute(self):
"""Test getting the value of an attribute from a tabledap
product's metadata
"""
self.assertEqual(
normalizers.TableDAPMetadataNormalizer.get_product_attribute(
self.raw_metadata['product_metadata'], 'cdm_data_type'),
'TrajectoryProfile')
with self.assertRaises(MetadataNormalizationError):
normalizers.TableDAPMetadataNormalizer.get_product_attribute(
self.raw_metadata['product_metadata'], 'foo')

def test_check(self):
"""Test the checking condition"""
self.assertTrue(self.normalizer.check(self.raw_metadata))

self.assertFalse(self.normalizer.check({}))
self.assertFalse(self.normalizer.check({'url': ''}))
self.assertFalse(self.normalizer.check({'url': '/foo/bar/baz.nc'}))

def test_get_entry_title(self):
"""Test getting the title"""
self.assertEqual(self.normalizer.get_entry_title(self.raw_metadata),
'Argo Float Measurements')

def test_missing_title(self):
"""A MetadataNormalizationError should be raised if the raw title
is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_entry_title(self.empty_raw_metadata)

def test_get_entry_id(self):
"""Test getting the ID"""
self.assertEqual(self.normalizer.get_entry_id(self.raw_metadata), '123456')

def test_entry_id_error(self):
"""A MetadataNormalizationError should be raised if ID is not found
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_entry_id(self.empty_raw_metadata)

def test_summary(self):
"""Test getting the summary"""
self.assertEqual(
self.normalizer.get_summary(self.raw_metadata),
'Argo float vertical profiles from Coriolis Global Data Assembly Centres')

def test_get_time_coverage_start(self):
"""Test getting the start of the time coverage"""
self.assertEqual(
self.normalizer.get_time_coverage_start(self.raw_metadata),
datetime(year=2023, month=1, day=1, tzinfo=timezone.utc))

def test_missing_time_coverage_start(self):
"""A MetadataNormalizationError must be raised when the
time_coverage_start raw attribute is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_time_coverage_start(self.empty_raw_metadata)

def test_get_time_coverage_end(self):
"""Test getting the end of the time coverage"""
self.assertEqual(
self.normalizer.get_time_coverage_end(self.raw_metadata),
datetime(year=2023, month=1, day=1, hour=12, minute=47, second=13, tzinfo=timezone.utc))

def test_missing_time_coverage_end(self):
"""A MetadataNormalizationError must be raised when the
time_coverage_end raw attribute is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_time_coverage_end(self.empty_raw_metadata)

def test_gcmd_platform(self):
"""Test getting the platform"""
with mock.patch('metanorm.utils.get_gcmd_platform') as mock_get_gcmd_method:
self.assertEqual(
self.normalizer.get_platform(self.raw_metadata),
mock_get_gcmd_method.return_value)
mock_get_gcmd_method.assert_called_with('Argo float')

def test_gcmd_platform_unknow(self):
"""Test getting the platform with GCMD versions that don't
support ARGO floats
"""
with mock.patch('metanorm.utils.get_gcmd_platform') as mock_get_gcmd_method:
mock_get_gcmd_method.return_value = {'Short_Name': utils.UNKNOWN}
self.assertEqual(
self.normalizer.get_platform(self.raw_metadata),
OrderedDict([
('Basis', 'Water-based Platforms'),
('Category', 'Buoys'),
('Sub_Category', 'Unmoored'),
('Short_Name', 'Argo-Float'),
('Long_Name', '')]))

def test_gcmd_instrument(self):
"""Test getting the instrument"""
self.assertEqual(
self.normalizer.get_instrument(self.raw_metadata),
OrderedDict([
('Category', utils.UNKNOWN),
('Class', utils.UNKNOWN),
('Type', utils.UNKNOWN),
('Subtype', utils.UNKNOWN),
('Short_Name', 'Unknown'),
('Long_Name', 'Unknown')]))

def test_gcmd_provider(self):
"""Test getting the provider"""
with mock.patch('metanorm.utils.get_gcmd_provider') as mock_get_gcmd_method:
self.assertEqual(
self.normalizer.get_provider(self.raw_metadata),
mock_get_gcmd_method.return_value)

def test_gcmd_provider_unknow(self):
"""Test getting the provider with GCMD versions that don't
support ARGO floats
"""
with mock.patch('metanorm.utils.get_gcmd_provider', return_value=None):
self.assertEqual(
self.normalizer.get_provider(self.raw_metadata),
OrderedDict([
('Bucket_Level0', 'CONSORTIA/INSTITUTIONS'),
('Bucket_Level1', ''),
('Bucket_Level2', ''),
('Bucket_Level3', ''),
('Short_Name', 'Argo'),
('Long_Name', 'Argo'),
('Data_Center_URL', '')]))

def test_get_location_geometry(self):
"""get_location_geometry() should return the location
of the dataset
"""
self.assertEqual(
self.normalizer.get_location_geometry(self.raw_metadata),
'LINESTRING (1 2, 3 4)')
Loading