diff --git a/repo2docker/contentproviders/dataverse.py b/repo2docker/contentproviders/dataverse.py index 9054f53c..1be1d49b 100644 --- a/repo2docker/contentproviders/dataverse.py +++ b/repo2docker/contentproviders/dataverse.py @@ -54,37 +54,7 @@ def detect(self, doi, ref=None, extra_args=None): return query_args = parse_qs(parsed_url.query) - # Corner case handling - if parsed_url.path.startswith("/file.xhtml"): - # There's no way of getting file information using its persistentId, the only thing we can do is assume that doi - # is structured as "doi:/" and try to handle dataset that way. - new_doi = doi.rsplit("/", 1)[0] - if new_doi == doi: - # tough luck :( Avoid inifite recursion and exit. - return - return self.detect(new_doi) - elif parsed_url.path.startswith("/api/access/datafile"): - # Raw url pointing to a datafile is a typical output from an External Tool integration - entity_id = os.path.basename(parsed_url.path) - search_query = "q=entityId:" + entity_id + "&type=file" - # Knowing the file identifier query search api to get parent dataset - search_url = urlunparse( - parsed_url._replace(path="/api/search", query=search_query) - ) - self.log.debug("Querying Dataverse: " + search_url) - data = self.urlopen(search_url).json()["data"] - if data["count_in_response"] != 1: - self.log.debug( - f"Dataverse search query failed!\n - doi: {doi}\n - url: {url}\n - resp: {json.dump(data)}\n" - ) - return - - self.record_id = deep_get(data, "items.0.dataset_persistent_id") - elif ( - parsed_url.path.startswith("/dataset.xhtml") - and "persistentId" in query_args - ): - self.record_id = deep_get(query_args, "persistentId.0") + self.record_id = deep_get(query_args, "persistentId.0") if hasattr(self, "record_id"): return {"record": self.record_id, "host": host}