diff --git a/.github/workflows/test-docker.yml b/.github/workflows/test-docker.yml index df6c62f..2f66015 100644 --- a/.github/workflows/test-docker.yml +++ b/.github/workflows/test-docker.yml @@ -53,6 +53,8 @@ jobs: echo "Testing removing subscription" # test deleting subscriptions docker exec subscriber bash -c "source /home/wis2downloader/.venv/bin/activate && wis2downloader remove-subscription --topic cache/a/wis2/+/services/#" + # clean up, remove test download + docker exec subscriber bash -c "rm \"./app/data/downloads/$(date +'%Y')/$(date +'%m')/$(date +'%d')/cache/a/wis2/my-centre/services/downloader/openapi.bin\"" - name: Run API tests working-directory: docker/tests run: | @@ -71,12 +73,14 @@ jobs: # publish a test message docker exec publisher pywis-pubsub publish --topic cache/a/wis2/my-centre/services/downloader \ --config /pywis-pubsub/config/config.yml \ - -i test -u "http://subscriber:5000/metrics" + -i test -u "http://subscriber:5000/openapi" sleep 1s # cat file contents (check the published file has been downloaded) - cat "./data/$(date +'%Y')/$(date +'%m')/$(date +'%d')/cache/a/wis2/my-centre/services/downloader/metrics.bin" + cat "./data/$(date +'%Y')/$(date +'%m')/$(date +'%d')/cache/a/wis2/my-centre/services/downloader/openapi.bin" # test deleting subscriptions curl -X DELETE http://localhost:5000/subscriptions/cache/a/wis2/%2B/services/%23 + # clean up, remove test download + docker exec subscriber bash -c "rm \"./app/data/downloads/$(date +'%Y')/$(date +'%m')/$(date +'%d')/cache/a/wis2/my-centre/services/downloader/openapi.bin\"" - name: Shutdown working-directory: docker/tests run: | diff --git a/wis2downloader/downloader/__init__.py b/wis2downloader/downloader/__init__.py index 9b4a881..e5ba823 100644 --- a/wis2downloader/downloader/__init__.py +++ b/wis2downloader/downloader/__init__.py @@ -135,10 +135,9 @@ def process_job(self, job) -> None: # Get information about the job for verification later expected_hash, hash_function = self.get_hash_info(job) - expected_size = job.get('payload', {}).get('content', {}).get('size') # Get the download url, update status, and file type from the job links - _url, update, media_type = self.get_download_url(job) + _url, update, media_type, expected_size = self.get_download_url(job) if _url is None: LOGGER.warning(f"No download link found in job {job}") @@ -236,7 +235,7 @@ def get_topic_and_centre(self, job) -> tuple: def get_hash_info(self, job): expected_hash = job.get('payload', {}).get( - 'properties', {}).get('integrity', {}).get('hash') + 'properties', {}).get('integrity', {}).get('value') hash_method = job.get('payload', {}).get( 'properties', {}).get('integrity', {}).get('method') @@ -244,8 +243,10 @@ def get_hash_info(self, job): # Check if hash method is known using our enumumeration of hash methods if hash_method in VerificationMethods._member_names_: + # get method method = VerificationMethods[hash_method].value - hash_function = hashlib.new(method) + # load and return from the hashlib library + hash_function = getattr(hashlib, method, None) return expected_hash, hash_function @@ -254,18 +255,21 @@ def get_download_url(self, job) -> tuple: _url = None update = False media_type = None + expected_size = None for link in links: if link.get('rel') == 'update': _url = link.get('href') media_type = link.get('type') + expected_size = link.get('length') update = True break elif link.get('rel') == 'canonical': _url = link.get('href') media_type = link.get('type') + expected_size = link.get('length') break - return _url, update, media_type + return _url, update, media_type, expected_size def extract_filename(self, _url) -> tuple: path = urlsplit(_url).path @@ -279,8 +283,12 @@ def validate_data(self, data, expected_hash, hash_function): return True - hash_value = hash_function(data).digest() - hash_value = base64.b64encode(hash_value).decode() + try: + hash_value = hash_function(data).digest() + hash_value = base64.b64encode(hash_value).decode() + except Exception as e: + LOGGER.error(e) + return False if (hash_value != expected_hash) or (len(data) != expected_size): return False