diff --git a/repo2docker/contentproviders/dataverse.py b/repo2docker/contentproviders/dataverse.py index 90215748..57f1cf84 100644 --- a/repo2docker/contentproviders/dataverse.py +++ b/repo2docker/contentproviders/dataverse.py @@ -1,7 +1,7 @@ +import hashlib import json import os import shutil -import hashlib from urllib.parse import parse_qs, urlparse, urlunparse from ..utils import copytree, deep_get, is_doi @@ -119,7 +119,7 @@ def get_datafiles(self, dataverse_host: str, url: str) -> list[dict]: persistent_id = qs["persistentId"][0] # https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/TJCLKP elif path.startswith("/dataset.xhtml"): - # https://dataverse.harvard.edu/api/access/datafile/3323458 + # https://dataverse.harvard.edu/api/access/datafile/3323458 persistent_id = qs["persistentId"][0] elif path.startswith("/api/access/datafile"): # What we have here is an entity id, which we can use to get a persistentId @@ -127,17 +127,27 @@ def get_datafiles(self, dataverse_host: str, url: str) -> list[dict]: persistent_id = self.get_dataset_id_from_file_id(dataverse_host, file_id) elif parsed_url.path.startswith("/file.xhtml"): file_persistent_id = qs["persistentId"][0] - persistent_id = self.get_dataset_id_from_file_id(dataverse_host, file_persistent_id) + persistent_id = self.get_dataset_id_from_file_id( + dataverse_host, file_persistent_id + ) else: - raise ValueError(f"Could not determine persistent id for dataverse URL {url}") + raise ValueError( + f"Could not determine persistent id for dataverse URL {url}" + ) - dataset_api_url = f"{dataverse_host}/api/datasets/:persistentId?persistentId={persistent_id}" + dataset_api_url = ( + f"{dataverse_host}/api/datasets/:persistentId?persistentId={persistent_id}" + ) resp = self._request(dataset_api_url, headers={"accept": "application/json"}) if resp.status_code == 404 and is_ambiguous: # It's possible this is a *file* persistent_id, not a dataset one - persistent_id = self.get_dataset_id_from_file_id(dataverse_host, persistent_id) + persistent_id = self.get_dataset_id_from_file_id( + dataverse_host, persistent_id + ) dataset_api_url = f"{dataverse_host}/api/datasets/:persistentId?persistentId={persistent_id}" - resp = self._request(dataset_api_url, headers={"accept": "application/json"}) + resp = self._request( + dataset_api_url, headers={"accept": "application/json"} + ) if resp.status_code == 404: # This persistent id is just not here diff --git a/tests/contentproviders/test_dataverse.py b/tests/contentproviders/test_dataverse.py index 2ab9ec69..cb5e06f3 100644 --- a/tests/contentproviders/test_dataverse.py +++ b/tests/contentproviders/test_dataverse.py @@ -102,41 +102,41 @@ def test_get_persistent_id(url, persistent_id): ( "https://dataverse.harvard.edu/file.xhtml?persistentId=doi:10.7910/DVN/6ZXAGT/3YRRYJ", "https://dataverse.harvard.edu/citation?persistentId=doi:10.7910/DVN/6ZXAGT/3YRRYJ", - "doi:10.7910/DVN/6ZXAGT/3YRRYJ" + "doi:10.7910/DVN/6ZXAGT/3YRRYJ", ), { - 'ArchaeoGLOBE-master/analysis/figures/1_response_distribution.png': '243c6a3dd66bc3c84102829b277ef333', - 'ArchaeoGLOBE-master/analysis/figures/2_trends_map_knowledge.png': '2ace6ae9d470dda6cf2f9f9a6588171a', - 'ArchaeoGLOBE-master/analysis/figures/3_trends_global.png': '63ccd0a7b2d20440cd8f418d4ee88c4d', - 'ArchaeoGLOBE-master/analysis/figures/4_consensus_transitions.png': 'facfaedabeac77c4496d4b9e962a917f', - 'ArchaeoGLOBE-master/analysis/figures/5_ArchaeoGLOBE_HYDE_comparison.png': '8e002e4d50f179fc1808f562b1353588', - 'ArchaeoGLOBE-master/apt.txt': 'b4224032da6c71d48f46c9b78fc6ed77', - 'ArchaeoGLOBE-master/analysis/archaeoglobe.pdf': 'f575be4790efc963ef1bd40d097cc06d', - 'ArchaeoGLOBE-master/analysis/archaeoglobe.Rmd': 'f37d5f7993fde9ebd64d16b20fc22905', - 'ArchaeoGLOBE-master/ArchaeoGLOBE.Rproj': 'd0250e7918993bab1e707358fe5633e0', - 'ArchaeoGLOBE-master/CONDUCT.md': 'f87ef290340322089c32b4e573d8f1e8', - 'ArchaeoGLOBE-master/.circleci/config.yml': '6eaa54073a682b3195d8fab3a9dd8344', - 'ArchaeoGLOBE-master/CONTRIBUTING.md': 'b3a6abfc749dd155a3049f94a855bf9f', - 'ArchaeoGLOBE-master/DESCRIPTION': '745ef979494999e483987de72c0adfbd', - 'ArchaeoGLOBE-master/dockerfile': 'aedce68e5a7d6e79cbb24c9cffeae593', - 'ArchaeoGLOBE-master/.binder/Dockerfile': '7564a41246ba99b60144afb1d3b6d7de', - 'ArchaeoGLOBE-master/.gitignore': '62c1482e4febbd35dc02fb7e2a31246b', - 'ArchaeoGLOBE-master/analysis/data/derived-data/hyde_crop_prop.RDS': '2aea7748b5586923b0de9d13af58e59d', - 'ArchaeoGLOBE-master/analysis/data/derived-data/kk_anthro_prop.RDS': '145a9e5dd2c95625626a720b52178b70', - 'ArchaeoGLOBE-master/LICENSE.md': '3aa9d41a92a57944bd4590e004898445', - 'ArchaeoGLOBE-master/analysis/data/derived-data/placeholder': 'd41d8cd98f00b204e9800998ecf8427e', - 'ArchaeoGLOBE-master/.Rbuildignore': 'df15e4fed49abd685b536fef4472b01f', - 'ArchaeoGLOBE-master/README.md': '0b0faabe580c4d76a0e0d64a4f54bca4', - 'ArchaeoGLOBE-master/analysis/data/derived-data/README.md': '547fd1a6e874f6178b1cf525b5b9ae72', - 'ArchaeoGLOBE-master/analysis/figures/S1_FHG_consensus.png': 'd2584352e5442b33e4b23e361ca70fe1', - 'ArchaeoGLOBE-master/analysis/figures/S2_EXAG_consensus.png': '513eddfdad01fd01a20263a55ca6dbe3', - 'ArchaeoGLOBE-master/analysis/figures/S3_INAG_consensus.png': 'b16ba0ecd21b326f873209a7e55a8deb', - 'ArchaeoGLOBE-master/analysis/figures/S4_PAS_consensus.png': '05695f9412337a00c1cb6d1757d0ec5c', - 'ArchaeoGLOBE-master/analysis/figures/S5_URBAN_consensus.png': '10119f7495d3b8e7ad7f8a0770574f15', - 'ArchaeoGLOBE-master/analysis/figures/S6_trends_map_landuse.png': 'b1db7c97f39ccfc3a9e094c3e6307af0', - 'ArchaeoGLOBE-master/analysis/figures/S7_ArchaeoGLOBE_KK10_comparison.png': '30341748324f5f66acadb34c114c3e9d', - } - ) + "ArchaeoGLOBE-master/analysis/figures/1_response_distribution.png": "243c6a3dd66bc3c84102829b277ef333", + "ArchaeoGLOBE-master/analysis/figures/2_trends_map_knowledge.png": "2ace6ae9d470dda6cf2f9f9a6588171a", + "ArchaeoGLOBE-master/analysis/figures/3_trends_global.png": "63ccd0a7b2d20440cd8f418d4ee88c4d", + "ArchaeoGLOBE-master/analysis/figures/4_consensus_transitions.png": "facfaedabeac77c4496d4b9e962a917f", + "ArchaeoGLOBE-master/analysis/figures/5_ArchaeoGLOBE_HYDE_comparison.png": "8e002e4d50f179fc1808f562b1353588", + "ArchaeoGLOBE-master/apt.txt": "b4224032da6c71d48f46c9b78fc6ed77", + "ArchaeoGLOBE-master/analysis/archaeoglobe.pdf": "f575be4790efc963ef1bd40d097cc06d", + "ArchaeoGLOBE-master/analysis/archaeoglobe.Rmd": "f37d5f7993fde9ebd64d16b20fc22905", + "ArchaeoGLOBE-master/ArchaeoGLOBE.Rproj": "d0250e7918993bab1e707358fe5633e0", + "ArchaeoGLOBE-master/CONDUCT.md": "f87ef290340322089c32b4e573d8f1e8", + "ArchaeoGLOBE-master/.circleci/config.yml": "6eaa54073a682b3195d8fab3a9dd8344", + "ArchaeoGLOBE-master/CONTRIBUTING.md": "b3a6abfc749dd155a3049f94a855bf9f", + "ArchaeoGLOBE-master/DESCRIPTION": "745ef979494999e483987de72c0adfbd", + "ArchaeoGLOBE-master/dockerfile": "aedce68e5a7d6e79cbb24c9cffeae593", + "ArchaeoGLOBE-master/.binder/Dockerfile": "7564a41246ba99b60144afb1d3b6d7de", + "ArchaeoGLOBE-master/.gitignore": "62c1482e4febbd35dc02fb7e2a31246b", + "ArchaeoGLOBE-master/analysis/data/derived-data/hyde_crop_prop.RDS": "2aea7748b5586923b0de9d13af58e59d", + "ArchaeoGLOBE-master/analysis/data/derived-data/kk_anthro_prop.RDS": "145a9e5dd2c95625626a720b52178b70", + "ArchaeoGLOBE-master/LICENSE.md": "3aa9d41a92a57944bd4590e004898445", + "ArchaeoGLOBE-master/analysis/data/derived-data/placeholder": "d41d8cd98f00b204e9800998ecf8427e", + "ArchaeoGLOBE-master/.Rbuildignore": "df15e4fed49abd685b536fef4472b01f", + "ArchaeoGLOBE-master/README.md": "0b0faabe580c4d76a0e0d64a4f54bca4", + "ArchaeoGLOBE-master/analysis/data/derived-data/README.md": "547fd1a6e874f6178b1cf525b5b9ae72", + "ArchaeoGLOBE-master/analysis/figures/S1_FHG_consensus.png": "d2584352e5442b33e4b23e361ca70fe1", + "ArchaeoGLOBE-master/analysis/figures/S2_EXAG_consensus.png": "513eddfdad01fd01a20263a55ca6dbe3", + "ArchaeoGLOBE-master/analysis/figures/S3_INAG_consensus.png": "b16ba0ecd21b326f873209a7e55a8deb", + "ArchaeoGLOBE-master/analysis/figures/S4_PAS_consensus.png": "05695f9412337a00c1cb6d1757d0ec5c", + "ArchaeoGLOBE-master/analysis/figures/S5_URBAN_consensus.png": "10119f7495d3b8e7ad7f8a0770574f15", + "ArchaeoGLOBE-master/analysis/figures/S6_trends_map_landuse.png": "b1db7c97f39ccfc3a9e094c3e6307af0", + "ArchaeoGLOBE-master/analysis/figures/S7_ArchaeoGLOBE_KK10_comparison.png": "30341748324f5f66acadb34c114c3e9d", + }, + ), ], ) def test_fetch(specs: list[str], md5tree):