Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/wmo-im/wis2box into cap-editor
Browse files Browse the repository at this point in the history
  • Loading branch information
RoryPTB committed Aug 22, 2024
2 parents 0683cb1 + da5ed3d commit 4bf09b5
Show file tree
Hide file tree
Showing 11 changed files with 144 additions and 21 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/tests-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ jobs:
python3 wis2box-ctl.py execute wis2box data ingest -mdi $DISCOVERY_METADATA_ID -p $TEST_DATA
- name: add Congo synop data (synop2bufr) 🇨🇩
env:
TOPIC_HIERARCHY: origin/a/wis2/cd-brazza_met_centre/data/core/weather/surface-based-observations/synop
TOPIC_HIERARCHY: origin/a/wis2/cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop
TERRITORY: COD
DISCOVERY_METADATA: /data/wis2box/metadata/discovery/cd-surface-weather-observations.yml
DISCOVERY_METADATA_ID: urn:wmo:md:cd-brazza_met_centre:surface-weather-observations
Expand All @@ -124,6 +124,7 @@ jobs:
python3 wis2box-ctl.py execute wis2box metadata station publish-collection --path $STATION_METADATA --topic-hierarchy $TOPIC_HIERARCHY
curl -s http://localhost/oapi/collections/discovery-metadata/items/$DISCOVERY_METADATA_ID --output /tmp/$DISCOVERY_METADATA_ID
check-jsonschema --schemafile /tmp/wcmp2-bundled.json /tmp/$DISCOVERY_METADATA_ID
python3 wis2box-ctl.py execute wis2box auth add-token --metadata-id $DISCOVERY_METADATA_ID -p token123 -y
python3 wis2box-ctl.py execute wis2box data ingest -mdi $DISCOVERY_METADATA_ID -p $TEST_DATA
- name: add example ship data (bufr2bufr) WMO
env:
Expand Down
4 changes: 2 additions & 2 deletions docs/source/reference/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Load initial stations:
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/italy.csv --topic-hierarchy it-roma_met_centre.data.core.weather.surface-based-observations.synop
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/algeria.csv --topic-hierarchy dz-alger_met_centre.data.core.weather.surface-based-observations.synop
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/romania.csv --topic-hierarchy ro-rnimh.data.core.weather.surface-based-observations.synop
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/congo.csv --topic-hierarchy cd-brazza_met_centre.data.core.weather.surface-based-observations.synop
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/congo.csv --topic-hierarchy cd-brazza_met_centre.data.recommended.weather.surface-based-observations.synop
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/wmo-test-ship.csv --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.ship
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/wmo-test-buoy.csv --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.buoy
wis2box metadata station publish-collection --path /data/wis2box/metadata/station/wmo-test-wind-profiler.csv --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.wind_profiler
Expand All @@ -79,7 +79,7 @@ Ingest data using the data ingest command to push data to the ``wis2box-incoming
wis2box data ingest --topic-hierarchy it-roma_met_centre.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/italy
wis2box data ingest --topic-hierarchy dz-alger_met_centre.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/algeria
wis2box data ingest --topic-hierarchy ro-rnimh.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/romania
wis2box data ingest --topic-hierarchy cd-brazza_met_centre.data.core.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/congo
wis2box data ingest --topic-hierarchy cd-brazza_met_centre.data.recommended.weather.surface-based-observations.synop --path $WIS2BOX_DATADIR/observations/congo
wis2box data ingest --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.ship --path $WIS2BOX_DATADIR/observations/wmo/ship
wis2box data ingest --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.buoy --path $WIS2BOX_DATADIR/observations/wmo/buoy
wis2box data ingest --topic-hierarchy int-wmo-test.data.core.weather.surface-based-observations.wind_profiler --path $WIS2BOX_DATADIR/observations/wmo/wind_profiler
Expand Down
4 changes: 2 additions & 2 deletions docs/source/user/data-ingest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ Select 'browse' on the ``wis2box-incoming`` bucket and select 'Choose or create

For example using a filepath matching the topic hierarchy:

* Topic Hierarchy: ``origin/a/wis2/cd-brazza_met_centre/data/core/weather/surface-based-observations/synop``
* upload data in the path containing: ``cd-brazza_met_centre/data/core/weather/surface-based-observations/synop``
* Topic Hierarchy: ``origin/a/wis2/cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop``
* upload data in the path containing: ``cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop``

The error message ``Path validation error: Could not match http://minio:9000/wis2box-incoming/... to dataset, ...`` indicates that a file was stored in a directory that could not be matched to a dataset.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
wis2box:
retention: P180D
topic_hierarchy: cd-brazza_met_centre/data/core/weather/surface-based-observations/synop
topic_hierarchy: cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop
country: cog
centre_id: cd-brazza_met_centre
data_mappings:
Expand Down Expand Up @@ -49,7 +49,7 @@ identification:
end: null
resolution: P1H
url: https://example.org/malawi-surface-weather-observations
wmo_data_policy: core
wmo_data_policy: recommended

contact:
host:
Expand Down
71 changes: 59 additions & 12 deletions tests/integration/test_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_wis2downloader():
'origin/a/wis2/dz-alger_met_centre/data/core/weather/surface-based-observations/synop': 28, # noqa
'origin/a/wis2/cn-cma/data/core/weather/prediction/forecast/medium-range/probabilistic/global': 10, # noqa
'origin/a/wis2/ro-rnimh/data/core/weather/surface-based-observations/synop': 49, # noqa
'origin/a/wis2/cd-brazza_met_centre/data/core/weather/surface-based-observations/synop': 14, # noqa
'origin/a/wis2/cd-brazza_met_centre/data/recommended/weather/surface-based-observations/synop': 0, # noqa
'origin/a/wis2/int-wmo-test/data/core/weather/surface-based-observations/buoy': 2, # noqa
'origin/a/wis2/int-wmo-test/data/core/weather/surface-based-observations/wind_profiler': 1, # noqa
'origin/a/wis2/int-wmo-test/data/core/weather/surface-based-observations/ship': 5, # noqa
Expand Down Expand Up @@ -197,6 +197,17 @@ def test_metadata_discovery_publish():
r = r.json()
assert r['conformsTo'][0] == 'http://wis.wmo.int/spec/wcmp/2/conf/core'

id_ = 'urn:wmo:md:cd-brazza_met_centre:surface-weather-observations'
r = SESSION.get(f'{API_URL}/collections/discovery-metadata/items/{id_}').json() # noqa

assert 'has_auth' in r['wis2box']
assert r['wis2box']['has_auth']

for link in r['links']:
if link['rel'] == 'collection' and link['title'] == id_:
assert link['security']['default']['type'] == 'http'
assert link['security']['default']['scheme'] == 'bearer'


def test_data_ingest():
"""Test data ingest/process publish"""
Expand Down Expand Up @@ -303,8 +314,11 @@ def test_message_api():
url = f'{API_URL}/collections/messages/items?sortby=-datetime'
r = SESSION.get(url).json()

# we want to find a particular message with data ID
target_data_id = "cd-brazza_met_centre:surface-weather-observations/WIGOS_0-20000-0-64406_20230803T090000" # noqa
# should match sum of counts above
assert r['numberMatched'] == sum(counts.values())

# we want to find a particular message with data ID for core data
target_data_id = 'mw-mw_met_centre:surface-weather-observations/WIGOS_0-454-2-AWSLOBI_20211111T125500' # noqa

msg = None
for feature in r['features']:
Expand All @@ -320,18 +334,14 @@ def test_message_api():
assert msg['geometry'] is not None

props = msg['properties']
assert props['datetime'] == '2023-08-03T09:00:00Z'
assert props['wigos_station_identifier'] == '0-20000-0-64406'
assert props['datetime'] == '2021-11-11T12:55:00Z'
assert props['wigos_station_identifier'] == '0-454-2-AWSLOBI'
assert props['integrity']['method'] == 'sha512'
assert not props['data_id'].startswith('wis2')
assert not props['data_id'].startswith('origin/a/wis2')
assert props['data_id'].startswith('cd')
assert props['content']['size'] == 253
assert not props['data_id'].startswith(('wis2', 'origin/a/wis2'))
assert props['data_id'].startswith('mw')
assert props['content']['size'] == 247
assert props['content']['encoding'] == 'base64'
assert props['content']['value'] is not None
assert 'gts' in props
assert props['gts']['ttaaii'] == 'SICG20'
assert props['gts']['cccc'] == 'FCBB'

link_rel = msg['links'][0]

Expand All @@ -344,3 +354,40 @@ def test_message_api():
assert str(r.headers['Content-Length']) == str(link_rel['length'])

assert b'BUFR' in r.content

# we want to find a particular message with data ID for recommended data
url = f'{API_URL}/collections/messages/items?sortby=-datetime&q=cd-brazza_met_centre' # noqa
r = SESSION.get(url).json()

target_data_id = "cd-brazza_met_centre:surface-weather-observations/WIGOS_0-20000-0-64406_20230803T090000" # noqa

msg = None
for feature in r['features']:
if feature['properties']['data_id'] == target_data_id:
msg = feature
break

assert msg is not None

is_valid, _ = validate_message(msg)
assert is_valid

assert msg['geometry'] is not None

props = msg['properties']
assert props['datetime'] == '2023-08-03T09:00:00Z'
assert props['wigos_station_identifier'] == '0-20000-0-64406'
assert props['integrity']['method'] == 'sha512'
assert not props['data_id'].startswith('wis2')
assert not props['data_id'].startswith('origin/a/wis2')
assert props['data_id'].startswith('cd')
assert 'content' not in props
assert 'gts' in props
assert props['gts']['ttaaii'] == 'SICG20'
assert props['gts']['cccc'] == 'FCBB'

link_rel = msg['links'][0]

assert link_rel['type'] == 'application/x-bufr'
assert link_rel['security']['default']['type'] == 'http'
assert link_rel['security']['default']['scheme'] == 'bearer'
39 changes: 38 additions & 1 deletion wis2box-management/wis2box/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@
import requests
from secrets import token_hex

from owslib.ogcapi.records import Records

from wis2box import cli_helpers
from wis2box.api import upsert_collection_item
from wis2box.data_mappings import get_data_mappings
from wis2box.env import AUTH_URL
from wis2box.env import AUTH_URL, DOCKER_API_URL


LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -170,6 +173,27 @@ def add_token(ctx, metadata_id, path, yes, token):
if create_token(path, token):
click.echo('Token successfully created')

if metadata_id is not None:
click.echo('Adding access control object to discovery metadata')

oar = Records(DOCKER_API_URL)

record = oar.collection_item('discovery-metadata', metadata_id)
record['wis2box']['has_auth'] = True

for link in record['links']:
if link['rel'] == 'collection' and link['title'] == metadata_id:
LOGGER.debug('Adding security object to link')
link['security'] = {
'default': {
'type': 'http',
'scheme': 'bearer',
'description': 'Please contact the data provider for access' # noqa
}
}

upsert_collection_item('discovery-metadata', record)


@click.command()
@click.pass_context
Expand All @@ -192,6 +216,19 @@ def remove_token(ctx, metadata_id, path, token):
if delete_token(path, token):
click.echo('Token successfully deleted')

if metadata_id is not None:
click.echo('Removing access control object to discovery metadata')

oar = Records(DOCKER_API_URL)

record = oar.collection_item('discovery-metadata', metadata_id)
record['wis2box'].pop('has_auth', None)
for link in record['links']:
if 'security' in link:
link.pop('security', None)

upsert_collection_item('discovery-metadata', record)


auth.add_command(add_token)
auth.add_command(remove_token)
Expand Down
1 change: 1 addition & 0 deletions wis2box-management/wis2box/data/bufr4.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def transform(self, input_data: Union[Path, bytes],
payload = {
'inputs': {
'channel': self.topic_hierarchy.replace('origin/a/wis2/', ''),
'metadata_id': self.metadata_id,
'notify': False,
'data': data
}
Expand Down
1 change: 1 addition & 0 deletions wis2box-management/wis2box/data/csv2bufr.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def transform(self, input_data: Union[Path, bytes],
payload = {
'inputs': {
'channel': self.topic_hierarchy.replace('origin/a/wis2/', ''),
'metadata_id': self.metadata_id,
'template': self.template,
'notify': False,
'data': data
Expand Down
1 change: 1 addition & 0 deletions wis2box-management/wis2box/data/synop2bufr.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def transform(self, input_data: Union[Path, bytes],
payload = {
'inputs': {
'channel': self.topic_hierarchy.replace('origin/a/wis2/', ''),
'metadata_id': self.metadata_id,
'year': year,
'month': month,
'notify': False,
Expand Down
9 changes: 9 additions & 0 deletions wis2box-management/wis2box/metadata/discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,15 @@ def publish_discovery_metadata(metadata: Union[dict, str]):
LOGGER.error(msg)
raise RuntimeError(msg)

oar = Records(DOCKER_API_URL)
try:
LOGGER.debug('Checking if record / auth enabled')
r = oar.collection_item('discovery-metadata', record['id']).json()
if r['wis2box'].get('has_auth', False):
record['wis2box']['has_auth'] = True
except Exception:
LOGGER.debug('No auth defined')

LOGGER.debug('Publishing to API')
upsert_collection_item('discovery-metadata', record)

Expand Down
28 changes: 27 additions & 1 deletion wis2box-management/wis2box/pubsub/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@
from pathlib import Path
import uuid

from owslib.ogcapi.records import Records

from wis2box import __version__
from wis2box.util import json_serial
from wis2box.env import STORAGE_PUBLIC, URL, STORAGE_SOURCE
from wis2box.env import DOCKER_API_URL, STORAGE_PUBLIC, URL, STORAGE_SOURCE
from wis2box.storage import get_data

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -210,6 +212,30 @@ def __init__(self, identifier: str, metadata_id: str, filepath: str,
}
self.message['links'].append(link)

LOGGER.debug(f'Checking for access control (metadata id: {metadata_id})') # noqa
try:
oar = Records(DOCKER_API_URL)
record = oar.collection_item('discovery-metadata', metadata_id)

if record['wis2box'].get('has_auth'):
LOGGER.debug('Updating message with access control')

for link in self.message['links']:
if link['href'] == public_file_url:
LOGGER.debug('Adding security object to link')
link['security'] = {
'default': {
'type': 'http',
'scheme': 'bearer',
'description': 'Please contact the data provider for access' # noqa
}
}

LOGGER.debug('Removing inline content')
self.message['properties'].pop('content', None)
except Exception as err:
LOGGER.debug(f'Cannot locate metadata record: {err}')


def gcm() -> dict:
"""
Expand Down

0 comments on commit 4bf09b5

Please sign in to comment.