From d6cef3d3e403cd1d3d981f3b0670b90a15b9fc1b Mon Sep 17 00:00:00 2001 From: Tom Kralidis Date: Fri, 9 Feb 2024 18:51:40 -0500 Subject: [PATCH] update codelist generator/publisher implementation (#6) --- .github/workflows/README.md | 11 + .github/workflows/generate-and-commit-ttl.yml | 41 +++ .github/workflows/generate-ttl.yml | 23 ++ .github/workflows/generateTTLandCommit.yml | 46 ---- .github/workflows/publish-to-wmo-codes.yml | 41 +++ codelists.csv | 5 + codelists/wcmp2-tables.csv | 4 - prodRegister | 1 - scripts/README.md | 86 ++++++ scripts/codelists2ttl.py | 193 ++++++++++++++ scripts/makeWCMP2Entities.py | 70 ----- scripts/requirements.txt | 1 + scripts/uploadChanges.py | 138 ---------- scripts/upload_changes.py | 252 ++++++++++++++++++ testRegister | 1 - 15 files changed, 653 insertions(+), 260 deletions(-) create mode 100644 .github/workflows/README.md create mode 100644 .github/workflows/generate-and-commit-ttl.yml create mode 100644 .github/workflows/generate-ttl.yml delete mode 100644 .github/workflows/generateTTLandCommit.yml create mode 100644 .github/workflows/publish-to-wmo-codes.yml create mode 100644 codelists.csv delete mode 100644 codelists/wcmp2-tables.csv delete mode 100644 prodRegister create mode 100644 scripts/README.md create mode 100644 scripts/codelists2ttl.py delete mode 100644 scripts/makeWCMP2Entities.py create mode 100644 scripts/requirements.txt delete mode 100644 scripts/uploadChanges.py create mode 100644 scripts/upload_changes.py delete mode 100644 testRegister diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 0000000..39152b0 --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,11 @@ +# GitHub Actions + +The following GitHub Actions are in place for this repository: + +- on Pull Request: test the generation of WCMP2 Codelists to TTL files +- on Commit/Push to `main` branch: generate WCMP2 Codelists to TTL files, and push to `publication` branch +- on Commit/Push to `publication` branch: publish the TTL files to the WMO Codes Registry testing environment + +Edit `codelists/*.csv` files -> GitHub Pull Request (test generation) -> Merge Pull Request (generate and commit to `publication` branch) -> Publish to WMO Codes Registry testing environment + +Publication to the WMO Codes Registry operational environment is executed as a manual step. diff --git a/.github/workflows/generate-and-commit-ttl.yml b/.github/workflows/generate-and-commit-ttl.yml new file mode 100644 index 0000000..99c81ab --- /dev/null +++ b/.github/workflows/generate-and-commit-ttl.yml @@ -0,0 +1,41 @@ +name: Generate WCMP2 Codelists as TTL files and commit + +on: + push: + branches: + - main + paths: + - '**.yml' + - 'codelists/**.csv' + +jobs: + main: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Generate TTL files + run: | + python3 scripts/codelists2ttl.py + - name: checkout publication branch + uses: actions/checkout@v3 + with: + ref: publication + - name: update publication branch and publish + run: | + mkdir /tmp/wis + mv wis/ tmp/ + git checkout publication + git config --global user.email "tomkralidis@gmail.com" + git config --global user.name "Tom Kralidis" + rm -rf wis/* + cp -rpf /tmp/wis/* . + git add . + git commit -am "update WMCP2 Codelists TTL files" + git push diff --git a/.github/workflows/generate-ttl.yml b/.github/workflows/generate-ttl.yml new file mode 100644 index 0000000..427e4a3 --- /dev/null +++ b/.github/workflows/generate-ttl.yml @@ -0,0 +1,23 @@ +name: Generate WCMP2 Codelists as TTL files + +on: + pull_request: + paths: + - '**.yml' + - 'codelists/**.csv' + +jobs: + main: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10'] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Generate TTL files + run: | + python3 scripts/codelists2ttl.py diff --git a/.github/workflows/generateTTLandCommit.yml b/.github/workflows/generateTTLandCommit.yml deleted file mode 100644 index 9308ea6..0000000 --- a/.github/workflows/generateTTLandCommit.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: generate-ttl-and-commit - -on: pull_request - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.10'] - - steps: - - name: Checkout - uses: actions/checkout@v2 - with: - fetch-depth: 0 - ref: ${{ github.event.pull_request.head.sha }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install rdflib - pip install requests - pip install testtools - - name: Generate and commit ttl to PR branch - run: | - if [ git show-ref --quiet refs/heads/publication ]; - then echo "publication branch exists" - git push origin --delete publication - fi - git checkout -b publication - python3 -m scripts.makeWCMP2Entities - git status - if [ -z "$(git status --porcelain)" ]; - then echo 'nothing to commit' - else - git add --all - git config --global user.email "${{ github.actor }}@users.noreply.github.com" - git config --global user.name "${{ github.actor }}" - git commit -am 'update wcmp2-codelists ttls' - git push --force --set-upstream origin publication - fi diff --git a/.github/workflows/publish-to-wmo-codes.yml b/.github/workflows/publish-to-wmo-codes.yml new file mode 100644 index 0000000..5a15939 --- /dev/null +++ b/.github/workflows/publish-to-wmo-codes.yml @@ -0,0 +1,41 @@ +name: Publish TTL files to WMO Codes Registry testing environment + +env: + WMO_CODES_TEST_USER_ID: ${{ secrets.WMO_CODES_TEST_USER_ID }} + WMO_CODES_TEST_API_KEY: ${{ secrets.WMO_CODES_TEST_API_KEY }} + +on: + push: + branches: + - publication + paths: + - 'wis/**.ttl' + +jobs: + main: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10'] + steps: + - uses: actions/checkout@v2 + with: + ref: publication + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Copy TTL files + run: | + mkdir /tmp/wis + cp -rp wis/ /tmp/ + - uses: actions/checkout@v2 + with: + ref: main + - name: Install dependencies + run: | + python3 -m pip install --upgrade pip + pip3 install -r scripts/requirements.txt + - name: update publication branch and publish + run: | + python3 scripts/upload_changes.py https://api.github.com/users/{WMO_CODES_TEST_USER_ID} {WMO_CODES_TEST_API_KEY} test /tmp/wis diff --git a/codelists.csv b/codelists.csv new file mode 100644 index 0000000..e862526 --- /dev/null +++ b/codelists.csv @@ -0,0 +1,5 @@ +Name,Description,Source +contact-role,Contact role,https://github.com/radiantearth/stac-spec/blob/master/collection-spec/collection-spec.md#provider-object +global-service-type,Global service type, +link-type,Link type, +resource-type,Resource type, diff --git a/codelists/wcmp2-tables.csv b/codelists/wcmp2-tables.csv deleted file mode 100644 index 14851f7..0000000 --- a/codelists/wcmp2-tables.csv +++ /dev/null @@ -1,4 +0,0 @@ -contact-role,contact-role,http://codes.wmo.int/wis2/contact-role -global-service-type,global-service-type,http://codes.wmo.int/wis2/global-service-type -link-type,link-type,http://codes.wmo.int/wis2/link-type -resource-type,resource-type,http://codes.wmo.int/wis2/resource-type diff --git a/prodRegister b/prodRegister deleted file mode 100644 index c40a416..0000000 --- a/prodRegister +++ /dev/null @@ -1 +0,0 @@ -https://codes.wmo.int diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..619f302 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,86 @@ +# WMO Codes Registry management + +## Overview + +The scripts in this directory are used to manage the WCMP2 codelists publication to +the WMO Codes Registry. + +## The WMO Codes Registry + +The [WMO Codes Registry](https://codes.wmo.int) is an authoritative service that +provides a number of registers defining controlled vocabularies used in various +WMO standards and systems. + +The service provides an API in support of automated workflow to manage codelist +registers. The API is available as follows: + +- https://ci.codes.wmo.int: testing +- https://codes.wmo.int: production + +API usage requires an account and credentials. Contact WMO Secretariat to be +provided access to the WMO Codes Registry API (a GitHub user id is required). + +Once you receive access, an API Key is required to manage resources on the registry. +To create an API Key, once logged into the registry, select _Admin / Create a temporary password (API Key)_, +and click _Create password_ to generate an API key. + +## Mapping from WCMP2 codelists to the WMO Code Registry + +The overall setup of WCMP2 codelists publication to the WMO Codes Registry works as follows: + +`wis` / CSV filename (without file extension) / CSV row `Name` + +where: + +- `wis` is the root `reg:Register` +- each WCMP2 codelist CSV file is a `reg:Register` itself, attached to the `wis` register as a sub-register +- each row in a WCMP2 codelist CSV file is a `skos:Concept` tied to its sub-register + +## Publication workflows + +Managing WCMP2 codelists publication to the WMO Codes Registry involves the following steps: + +- creating the `wis` register +- generating TTL files from CSV +- publishing TTL files to the WMO Codes Registry + +### Creating the `wis` register + +TODO + +### Generating TTLs + +To generate TTL files, from the root of the repository, run the following command: + +```bash +python3 scripts/codeslists2ttl.py +``` + +This will create all TTL files in a directory called `wis`. + +### Publishing TTLs + +To generate TTL files, from the root of the repository, run the following command: + +```bash +python3 scripts/upload_changes.py https://api.github.com/users/{user_id} +``` + +where: + +- `user_id` is your GitHub userid +- `password` is the API Key (see the [#overview](Overview) with instructions on how to generate an API Key +- `environment` is whether to upload change to the testing or production environment +- `output-directory` is the resulting directly where TTL outputs should published from + +Examples: + +```bash +# publish to test environment on https://ci.codes.wmo.int +python3 scripts/upload_changes.py https://api.github.com/users/tomkralidis API_KEY test wis + +# publish to production environment on https://codes.wmo.int +python3 scripts/upload_changes.py https://api.github.com/users/tomkralidis API_KEY prod wis +``` + +This will create/update all resources on the WMO Codes Registry. diff --git a/scripts/codelists2ttl.py b/scripts/codelists2ttl.py new file mode 100644 index 0000000..c8d2156 --- /dev/null +++ b/scripts/codelists2ttl.py @@ -0,0 +1,193 @@ +############################################################################### +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +############################################################################### + +import csv +from pathlib import Path +import shutil +from string import Template + + +def gen_skos_register(subregisters: list) -> str: + """ + Generate SKOS Register TTL + + :param name: identifier of collection + :param description: label of collection + :param source: concept that is defined externally + + :returns: `str` of SKOS Register TTL + """ + + REGISTER = ''' +@prefix skos: . +@prefix dct: . +@prefix ldp: . +@prefix reg: . +@prefix rdfs: . +@prefix owl: . + + a reg:Register , ldp:Container ; + rdfs:label "WIS" ; + reg:notation "wis" ; + dct:description "WMO Core Metadata Profile"@en ; + reg:subregister ''' + + REGISTER += ' , '.join(subregisters) + ' ; \n' + + REGISTER += 'rdfs:member' + ' , '.join(subregisters) + + REGISTER += ' .' + + return REGISTER.strip() + + +def gen_skos_subregister(name: str, description: str, + source: str = None) -> str: + """ + Generate SKOS Sub-register TTL + + :param name: identifier of collection + :param description: label of collection + :param source: concept that is defined externally + + :returns: `str` of SKOS Sub-register TTL + """ + + SUBREGISTER = ''' +@prefix skos: . +@prefix dct: . +@prefix ldp: . +@prefix reg: . +@prefix rdfs: . +@prefix owl: . + +<$name> a reg:Register , skos:Collection , ldp:Container ; + ldp:hasMemberRelation skos:member ; + rdfs:label "$name" ; + dct:description "$description"''' + + template_vars = { + 'name': name, + 'description': description + } + + if source != '': + SUBREGISTER += ' ;\n owl:sameAs "$source" .' + template_vars['source'] = source + else: + SUBREGISTER += ' .' + + return Template(SUBREGISTER).substitute(template_vars).strip() + + +def gen_skos_concept(name: str, description: str, source: str = None) -> str: + """ + Generate SKOS Concept TTL + + :param name: identifier of collection + :param description: label of collection + :param source: concept that is defined externally + + :returns: `str` of SKOS Concept TTL + """ + + CONCEPT = ''' +@prefix skos: . +@prefix rdfs: . +@prefix dct: . +@prefix owl: . + +<$name> a skos:Concept ; + rdfs:label "$name" ; + skos:notation "$name" ; + dct:description "$description"@en''' + + template_vars = { + 'name': name, + 'description': description + } + + if source != '': + CONCEPT += ' ;\n owl:sameAs "$source" .' + template_vars['source'] = source + else: + CONCEPT += ' .' + + return Template(CONCEPT).substitute(template_vars).strip() + + +REGISTER = 'http://codes.wmo.int/wis' + +ROOTPATH = Path.cwd() +CSV_FILES_PATH = ROOTPATH / 'codelists' +TTL_FILES_PATH = ROOTPATH / 'wis' +COLLECTIONS = [] + +print('Generating WCMP2 TTL files') + +ttl_files_path = ROOTPATH / 'wis' + +if ttl_files_path.exists(): + shutil.rmtree(ttl_files_path) + +ttl_files_path.mkdir() + +root_table = ROOTPATH / 'codelists.csv' + +with root_table.open() as fh: + subregisters = [] + reader = csv.DictReader(fh) + for row in reader: + subregisters.append(f"") + register_ttl_dir = ttl_files_path / row['Name'] + register_ttl_file = ttl_files_path / f"{row['Name']}.ttl" + print(f'Generating {register_ttl_file}') + + register_ttl_dir.mkdir() + + with register_ttl_file.open('w') as fh2: + ttl = gen_skos_subregister(row['Name'], + row['Description'], + row['Source']) + + fh2.write(ttl) + + concept_csv_file = ROOTPATH / 'codelists' / f"{row['Name']}.csv" + concept_ttl_file = register_ttl_dir / f"{row['Name']}.ttl" + + with concept_csv_file.open() as fh2: + reader2 = csv.DictReader(fh2) + for row2 in reader2: + concept_ttl_file = register_ttl_dir / f"{row2['Name']}.ttl" + print(f'Generating {concept_ttl_file}') + with concept_ttl_file.open('w') as fh3: + ttl = gen_skos_concept(row2['Name'], + row2['Description'], + row2['Source']) + + fh3.write(ttl) + + register_ttl_file = ttl_files_path / 'wis.ttl' + print(f'Generating {register_ttl_file}') + with register_ttl_file.open('w') as fh: + fh.write(gen_skos_register(subregisters)) + +print('Done') diff --git a/scripts/makeWCMP2Entities.py b/scripts/makeWCMP2Entities.py deleted file mode 100644 index 2984a90..0000000 --- a/scripts/makeWCMP2Entities.py +++ /dev/null @@ -1,70 +0,0 @@ -import csv -import os -import re -import warnings -import shutil - -collectionTemplate = ('@prefix skos: . \n' - '@prefix dct: . \n' - '@prefix ldp: .\n' - '@prefix reg: .\n' - '@prefix rdfs: .\n' - '@prefix owl: .\n\n' - '<{identity}> a reg:Register , skos:Collection , ldp:Container ;\n' - '\tldp:hasMemberRelation skos:member ;\n' - '\trdfs:label "{label}" ;\n' - '\tdct:description "{description}"') - -conceptTemplate = ('@prefix skos: . \n' - '@prefix rdfs: .\n' - '@prefix dct: .\n' - '@prefix owl: .\n\n' - '<{identity}> a skos:Concept ;\n' - '\trdfs:label "{label}" ;\n' - '\tskos:notation "{notation}" ;\n' - '\tdct:description "{description}"') - -def clean(astr): - if '"' in astr: - astr = astr.replace('"', "'") - astr = astr.strip() - return astr - -def main(): - print('Make WCMP2 TTL contents') - root_path = os.path.split(os.path.dirname(__file__))[0] - if os.path.exists(os.path.join(root_path, 'wis')): - shutil.rmtree(os.path.join(root_path, 'wis')) - os.mkdir(os.path.join(root_path, 'wis')) - - with open(os.path.join(root_path, 'codelists', 'wcmp2-tables.csv'), encoding='utf-8') as wcmp2tables: - reader = csv.reader(wcmp2tables, delimiter=',', quotechar='"') - for wcmp2table in reader: - identifier = wcmp2table[0] - if not os.path.exists(os.path.join(root_path, 'codelists', '{}.csv'.format(wcmp2table[0]))): - raise ValueError('WCMP2 Table {} missing from path'.format(wcmp2table[0])) - with open(os.path.join(root_path, 'wis', '{}.ttl'.format(identifier)), 'w', encoding='utf-8') as ttlf: - if wcmp2table[2] == "" or wcmp2table[2] is None: - ttlf.write(collectionTemplate.format(identity=clean(wcmp2table[0]), label=clean(wcmp2table[0]), - description=clean(wcmp2table[1])) + '.\n') - else: - ttlf.write(collectionTemplate.format(identity=clean(wcmp2table[0]), label=clean(wcmp2table[0]), - description=clean(wcmp2table[1])) + ';\n\towl:sameAs "' + wcmp2table[2] + '.\n') - if not os.path.exists(os.path.join(root_path, 'wis', identifier)): - os.mkdir(os.path.join(root_path, 'wis', identifier)) - with open(os.path.join(root_path, 'codelists', '{}.csv'.format(wcmp2table[0])), encoding='utf-8') as wcmp2entries: - wcmp2_reader = csv.DictReader(wcmp2entries) - for entry in wcmp2_reader: - with open(os.path.join(root_path, 'wis', identifier, '{}.ttl'.format(entry['Name'])), 'w', encoding='utf-8') as entryfile: - if entry['Source'] == "" or entry['Source'] is None: - entryfile.write(conceptTemplate.format(identity=entry['Name'], notation=entry['Name'], - label=clean(entry['Name']), - description=clean(entry['Description'])) + '@en\t.\n') - else: - entryfile.write(conceptTemplate.format(identity=entry['Name'], notation=entry['Name'], - label=clean(entry['Name']), - description=clean(entry['Description'])) + ';\n\towl:sameAs "{source}"@en\t.\n') - - -if __name__ == '__main__': - main() diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..f229360 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1 @@ +requests diff --git a/scripts/uploadChanges.py b/scripts/uploadChanges.py deleted file mode 100644 index 1ca6be5..0000000 --- a/scripts/uploadChanges.py +++ /dev/null @@ -1,138 +0,0 @@ -import argparse -import os -import requests - -""" -This script uploads TTL files to the defined register - ./prodRegister or ./testRegister - -This requires an authentication token/password, userID and the name of directory with TTL files. -""" - - -def authenticate(base_url, user_id, password): - """Constructs authenticated session (with JSESSIONID cookie).""" - url = f"{base_url}/system/security/apilogin" - print(f'Authenticating at "{url}"') - session = requests.Session() - auth = session.post(url, data={"userid": user_id, "password": password}) - if not auth.status_code == 200: - raise ValueError("Authentication failed") - return session - - -def post(session, url, payload, dry_run, verbose): - """Posts new content to the intended parent register.""" - headers = {"Content-type": "text/turtle; charset=UTF-8"} - response = session.get(url, headers=headers) - # params = {'status':'experimental'} - params = {"status": "stable"} - if not dry_run: - if verbose: - print(f' Posting to: "{url}"') - # print(f'payload: {payload.encode("utf-8")}') - print(f" headers: {headers}") - print(f" params: {params}") - res = session.post(url, headers=headers, data=payload.encode("utf-8"), params=params, stream=False) - if res.status_code != 201: - print(f' POST failed with {res.status_code} {res.reason}:\n {res.content.decode("utf-8")}') - elif verbose: - print(f" POST succeeded with {res.status_code} {res.reason}\n") - else: - print(f' Would post to: "{url}"') - print(f" headers: {headers}") - print(f" params: {params}") - - -def put(session, url, payload, dry_run, verbose): - """Updates definition of a register or entity.""" - headers = {"Content-type": "text/turtle; charset=UTF-8"} - # params = {'status':'experimental'} - params = {"status": "stable"} - # for register update adjust the URL - if "reg:Register" in payload: - url += "?non-member-properties" - if not dry_run: - if verbose: - print(f' Putting to: "{url}"') - # print(f'payload: {payload.encode("utf-8")}') - print(f" headers: {headers}") - print(f" params: {params}") - res = session.put(url, headers=headers, data=payload.encode("utf-8"), params=params) - if res.status_code != 204: - print(f' PUT failed with {res.status_code} {res.reason}:\n {res.content.decode("utf-8")}') - elif verbose: - print(f" PUT succeeded with {res.status_code} {res.reason}\n") - else: - print(f' Would put to "{url}"') - print(f" headers: {headers}") - print(f" params: {params}") - - -def upload(session, url, payload, dry_run, verbose): - """PUTs or POSTs given data depending if it already exists or not.""" - headers = {"Content-type": "text/turtle; charset=UTF-8"} - if verbose: - print(f" Checking {url}:", end=" ") - response = session.get(url, headers=headers) - if response.status_code == 200: - if verbose: - print("Existing entry, using PUT") - put(session, url, payload, dry_run, verbose) - elif response.status_code == 404: - if verbose: - print("New entry, using POST") - url = "/".join(url.split("/")[:-1]) - post(session, url, payload, dry_run, verbose) - else: - raise ValueError( - f'Cannot upload to {url}: {response.status_code} {response.reason}:\n {response.content.decode("utf-8")}' - ) - - -def upload_file(session, rootURL, file_path, dry_run, verbose): - """Uploads given TTL file to the registry.""" - with open(file_path, "r", encoding="utf-8") as file: - ttl_data = file.read() - relID = file_path.replace(".ttl", "") - url = f"{rootURL}/{relID}" - print(f"Uploading {file_path}") - upload(session, url, ttl_data, dry_run, verbose) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("user_id", help='User ID, in form "https://api.github.com/users/"') - parser.add_argument( - "pass_code", help='Password or token generated at "https://ci.codes.wmo.int/ui/temporary-password"' - ) - parser.add_argument("mode", help='Mode: "test" or "prod"') - parser.add_argument("directory", help="Name of the folder with TTL files to upload.") - parser.add_argument( - "-n", - "--dry-run", - action="store_true", - help="Only print what would be uploaded without actually sending anything.", - ) - parser.add_argument("-v", "--verbose", action="store_true", help="Print more details.") - args = parser.parse_args() - - if not os.path.isdir(args.directory): - raise ValueError(f'Directory "{args.directory}" does not exists.') - if args.mode not in ["test", "prod"]: - raise ValueError('Mode must be either "test" or "prod"') - if args.mode == "prod": - with open("prodRegister", "r", encoding="utf-8") as fh: - base_url = fh.read().split("\n")[0] - elif args.mode == "test": - with open("testRegister", "r", encoding="utf-8") as fh: - base_url = fh.read().split("\n")[0] - - print(f"Running upload with respect to {base_url}") - - session = authenticate(base_url, args.user_id, args.pass_code) - for root, dirs, files in os.walk(args.directory): - for file_ in files: - if file_.endswith(".ttl"): - filename = os.path.join(root, file_) - upload_file(session, base_url, filename, args.dry_run, args.verbose) diff --git a/scripts/upload_changes.py b/scripts/upload_changes.py new file mode 100644 index 0000000..09420b8 --- /dev/null +++ b/scripts/upload_changes.py @@ -0,0 +1,252 @@ +############################################################################### +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +############################################################################### + +import argparse +from pathlib import Path +import requests + +""" +This script uploads TTL files to the defined test of production register + +This requires authentication in the form of user_id, API Key +and the name of directory with TTL files to be published. +""" + +HEADERS = { + 'Content-type': 'text/turtle; charset=UTF-8' +} + +PROD_REGISTRY = 'https://codes.wmo.int' +TEST_REGISTRY = 'https://ci.codes.wmo.int' + + +def authenticate(base_url: str, user_id: str, + password: str) -> requests.Session: + """ + Constructs authenticated session (with JSESSIONID cookie) + + :param base_url: base URL of registry API + :param user_id: User ID + :param passwor: password + + :returns: Session for further interaction upon successful login + """ + + url = f'{base_url}/system/security/apilogin' + print(f'Authenticating at {url}') + + session = requests.Session() + + data = { + 'userid': user_id, + 'password': password + } + + auth = session.post(url, data=data) + + if auth.status_code != 200: + raise ValueError('Authentication failed') + + return session + + +def post(session: requests.Session, url: str, payload: str, + dry_run: bool, verbose: bool) -> None: + """ + Posts new content to the intended parent register + + :param session: API session + :param url: URL of HTTP POST + :param payload: HTTP POST payload + :param dry_run: whether to run as a dry run (simulates request only) + :param verbose: whether to provide verbose output + + :returns: `None` + """ + + params = { + 'status': 'stable' + } + + if not dry_run: + if verbose: + print(f' Posting to: {url}') + print(f' headers: {HEADERS}') + print(f' params: {params}') + + res = session.post(url, headers=HEADERS, data=payload.encode('utf-8'), + params=params, stream=False) + + if res.status_code != 201: + print(f' POST failed with {res.status_code} {res.reason}: {res.content.decode("utf-8")}') # noqa + elif verbose: + print(f' POST succeeded with {res.status_code} {res.reason}') + else: + print(f' HTTP POST (dry run) to: {url}') + print(f' headers: {HEADERS}') + print(f' params: {params}') + + return + + +def put(session: requests.Session, url: str, payload: str, + dry_run: bool, verbose: bool) -> None: + """ + Updates definition of a register or entity. + + :param session: API session + :param url: URL of HTTP POST + :param payload: HTTP POST payload + :param dry_run: whether to run as a dry run (simulates request only) + :param verbose: whether to provide verbose output + + :returns: `None` + """ + + # params = {'status':'experimental'} + params = {'status': 'stable'} + # for register update adjust the URL + if 'reg:Register' in payload: + url += '?non-member-properties' + if not dry_run: + if verbose: + print(f' HTTP PUT to: {url}') + print(f' headers: {HEADERS}') + print(f' params: {params}') + + res = session.put(url, headers=HEADERS, data=payload.encode('utf-8'), + params=params) + + if res.status_code != 204: + print(f' PUT failed with {res.status_code} {res.reason}: {res.content.decode("utf-8")}') # noqa + elif verbose: + print(f' PUT succeeded with {res.status_code} {res.reason}') + else: + print(f' HTTP PUT (dry run) to {url}') + print(f' headers: {HEADERS}') + print(f' params: {params}') + + return + + +def upload(session: requests.Session, url: str, payload: str, + dry_run: bool, verbose: bool) -> None: + """ + PUTs or POSTs given data depending if it already exists or not + + :param session: API session + :param url: URL of HTTP POST + :param payload: HTTP POST payload + :param dry_run: whether to run as a dry run (simulates request only) + :param verbose: whether to provide verbose output + + :returns: `None` + """ + + if verbose: + print(f' Checking {url}') + + response = session.get(url, headers=HEADERS) + + if response.status_code == 200: + if verbose: + print('Existing entry, using PUT') + put(session, url, payload, dry_run, verbose) + elif response.status_code == 404: + if verbose: + print('New entry, using POST') + url = '/'.join(url.split('/')[:-1]) + post(session, url, payload, dry_run, verbose) + else: + raise ValueError( + f'Cannot upload to {url}: {response.status_code} {response.reason}: {response.content.decode("utf-8")}' # noqa + ) + + +def upload_file(session: requests.Session, url: str, filepath: Path, + dry_run: bool, verbose: bool) -> None: + """ + Uploads given TTL file to the registry + + :param session: API session + :param url: URL of HTTP POST + :param filepath : `path.Path` of filepath + :param dry_run: whether to run as a dry run (simulates request only) + :param verbose: whether to provide verbose output + + :returns: `None` + """ + + with filepath.open(encoding='utf-8') as fh: + ttl_data = fh.read() + rel_id = filepath.parent / filepath.stem + if filepath.stem == 'wis': + rel_id = filepath.stem + url = f'{url}/{rel_id}' + + print(f'Uploading {filepath} to {url}') + upload(session, url, ttl_data, dry_run, verbose) + + return + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('user_id', + help='User ID, in form https://api.github.com/users/') # noqa + parser.add_argument( + 'password', + help=f'Password or token generated at {TEST_REGISTRY}/ui/temporary-password' # noqa + ) + parser.add_argument('mode', help='Mode: test or prod') + parser.add_argument('directory', + help='Name of the directory with TTL files to upload.') + parser.add_argument( + '-n', + '--dry-run', + action='store_true', + help='Print what would be uploaded without actually sending anything.' + ) + parser.add_argument('-v', '--verbose', + action='store_true', help='Print more details.') + + args = parser.parse_args() + + REGISTRY = TEST_REGISTRY + + if not Path(args.directory).is_dir(): + raise ValueError(f'Directory {args.directory} does not exists.') + if args.mode not in ['test', 'prod']: + raise ValueError('Mode must be either test or prod') + if args.mode == 'prod': + REGISTRY = PROD_REGISTRY + + print(f'Running upload against {REGISTRY}') + + session = authenticate(REGISTRY, args.user_id, args.password) + + # cleanup if needed + # session.delete('https://ci.codes.wmo.int/wis') + + for filename in Path(args.directory).rglob('*.ttl'): + upload_file(session, REGISTRY, filename, args.dry_run, args.verbose) + + print('Done') diff --git a/testRegister b/testRegister deleted file mode 100644 index a29eca8..0000000 --- a/testRegister +++ /dev/null @@ -1 +0,0 @@ -https://ci.codes.wmo.int