Skip to content

Commit

Permalink
Avoid upload of unchanged TTL files #22
Browse files Browse the repository at this point in the history
The server adds extra elements so the comparison is a bit tricky, basically, we verify that all "local stuff" is in the server's version.
  • Loading branch information
josusky committed Nov 3, 2024
1 parent 5c8f065 commit 929d3df
Showing 1 changed file with 58 additions and 22 deletions.
80 changes: 58 additions & 22 deletions scripts/upload_changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,25 @@
import argparse
from pathlib import Path
import requests
import rdflib
import rdflib.compare
from enum import Enum


HEADERS = {
'Content-type': 'text/turtle; charset=UTF-8'
}

PROD_REGISTRY = 'https://codes.wmo.int'
TEST_REGISTRY = 'https://ci.codes.wmo.int'
PUBLIC_ID_PREFIX = 'http://codes.wmo.int'


# class syntax
class CheckResult(Enum):
EQUAL = 1
CHANGED = 2
NEW = 3


def authenticate(base_url: str, user_id: str,
Expand Down Expand Up @@ -142,45 +154,58 @@ def put(session: requests.Session, url: str, payload: str,
return


def upload(session: requests.Session, url: str, payload: str,
dry_run: bool, verbose: bool, status: str) -> None:
def check_file(session: requests.Session, url: str, public_id: str,
local_ttl: str, verbose: bool) -> CheckResult:
"""
PUTs or POSTs given data depending if it already exists or not
Compares local file with the server version (if any) as graphs.
:param session: API session
:param url: URL of HTTP POST
:param payload: HTTP POST payload
:param dry_run: whether to run as a dry run (simulates request only)
:param verbose: whether to provide verbose output
:param status: publication status (experimental, stable)
:param url: URL of the server resource to compare with.
:param local_ttl: Current local TTL representation.
:param verbose: Whether to provide verbose output
:param public_id: Id of the resource.
:returns: `None`
:returns: `True` if local TTL is subset of the the server's version.
"""

# to check existence adjust the URL
url_to_check = url + '/'
headers_to_check = {
'Accept': 'text/turtle',
'Cache-Control': 'private, no-store, no-cache, max-age=0'
}
if verbose:
print(f' Checking {url_to_check} - ', end=' ')

response = session.get(url_to_check)
response = session.get(url_to_check, headers=headers_to_check)

if response.status_code == 200:
if verbose:
print('Existing entry, using PUT')
put(session, url, payload, dry_run, verbose, status)
print('Existing entry, going to compare:', end=' ')
server_rdf = rdflib.Graph()
server_rdf.parse(data=response.text, format='n3')

local_rdf = rdflib.Graph()
local_rdf.parse(data=local_ttl, format='n3', publicID=public_id)
in_both, in_local, in_server = rdflib.compare.graph_diff(local_rdf,
server_rdf)
if len(in_local) == 0:
if verbose:
print("Equal.")
return CheckResult.EQUAL
else:
return CheckResult.CHANGED
elif response.status_code == 404:
if verbose:
print('New entry, using POST')
url = '/'.join(url.split('/')[:-1])
post(session, url, payload, dry_run, verbose, status)
print('New.')
return CheckResult.NEW
else:
raise ValueError(
f'Cannot upload to {url}: {response.status_code} {response.reason}: {response.content.decode("utf-8")}' # noqa
)


def upload_file(session: requests.Session, url: str, filepath: Path,
dry_run: bool, verbose: bool, status: str) -> None:
def process_file(session: requests.Session, url: str, filepath: Path,
dry_run: bool, verbose: bool, status: str) -> None:
"""
Uploads given TTL file to the registry
Expand All @@ -200,9 +225,20 @@ def upload_file(session: requests.Session, url: str, filepath: Path,
if filepath.stem == 'wis':
rel_id = filepath.stem
url = f'{url}/{rel_id}'
public_id = f'{PUBLIC_ID_PREFIX}/{rel_id}'

print(f'Processing {filepath}')

print(f'Uploading {filepath} to {url}')
upload(session, url, ttl_data, dry_run, verbose, status)
result = check_file(session, url, public_id, ttl_data, verbose)
if result == CheckResult.CHANGED:
put(session, url, ttl_data, dry_run, verbose, status)
elif result == CheckResult.NEW:
if verbose:
print('New entry, using POST')
url = '/'.join(url.split('/')[:-1])
post(session, url, ttl_data, dry_run, verbose, status)
else:
print(" Unchanged entry, nothing to do.")

return

Expand Down Expand Up @@ -249,7 +285,7 @@ def upload_file(session: requests.Session, url: str, filepath: Path,
# session.delete('https://ci.codes.wmo.int/wis')

for filename in Path(args.directory).rglob('*.ttl'):
upload_file(session, REGISTRY, filename, args.dry_run,
args.verbose, args.status)
process_file(session, REGISTRY, filename, args.dry_run,
args.verbose, args.status)

print('Done')

0 comments on commit 929d3df

Please sign in to comment.