From 13459bfc9730274dc0d98ca32dae4d84be42fa60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=BCnsebeck?= Date: Thu, 21 Apr 2022 12:42:13 +0200 Subject: [PATCH] support path names with spaces, fixes #40 --- ocrd_browser/model/document.py | 18 +++++++++++++----- tests/model/test_document.py | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/ocrd_browser/model/document.py b/ocrd_browser/model/document.py index 33eec28..04eddaf 100644 --- a/ocrd_browser/model/document.py +++ b/ocrd_browser/model/document.py @@ -22,7 +22,7 @@ from pathlib import Path from tempfile import mkdtemp from datetime import datetime -from urllib.parse import urlparse +from urllib.parse import urlparse, unquote # noinspection PyProtectedMember from lxml.etree import ElementBase as Element, _ElementTree as ElementTree @@ -72,9 +72,9 @@ def load(cls, mets_url: Union[Path, str] = None, emitter: EventCallBack = None) """ if not mets_url: return cls.create(emitter=emitter) - mets_url = cls._strip_local(mets_url) + mets_path = cls._to_path(mets_url) - workspace = Resolver().workspace_from_url(mets_url, download=False) + workspace = Resolver().workspace_from_url(str(mets_path), download=False) doc = cls(workspace, emitter=emitter, original_url=mets_url) doc._empty = False return doc @@ -110,7 +110,7 @@ def save(self, backup_directory: Union[bool, Path, str] = True) -> None: def save_as(self, mets_url: Union[Path, str], backup_directory: Union[bool, Path, str] = True) -> None: log = getLogger('ocrd_browser.model.document.Document.save_as') - mets_path = Path(self._strip_local(mets_url, disallow_remote=True)) + mets_path = self._to_path(mets_url) workspace_directory = mets_path.parent if workspace_directory.exists(): @@ -496,11 +496,19 @@ def _emit(self, event: str, *args: Any) -> None: def _strip_local(mets_url: Union[Path, str], disallow_remote: bool = True) -> str: result = urlparse(str(mets_url)) if result.scheme == 'file' or result.scheme == '': - mets_url = result.path + mets_url = unquote(result.path) elif disallow_remote: raise ValueError('invalid url {}'.format(mets_url)) return str(mets_url) + @staticmethod + def _to_path(mets_url: Union[Path, str]) -> Path: + result = urlparse(str(mets_url)) + if not (result.scheme == 'file' or result.scheme == ''): + raise ValueError('invalid local path/url {}'.format(mets_url)) + return Path(unquote(result.path)) + + @staticmethod def _derive_backup_directory(workspace_directory: Path, now: datetime = None) -> Path: now = now or datetime.now() diff --git a/tests/model/test_document.py b/tests/model/test_document.py index 4006df0..9a9a1fe 100644 --- a/tests/model/test_document.py +++ b/tests/model/test_document.py @@ -1,7 +1,7 @@ from pathlib import Path from tempfile import TemporaryDirectory -from tests import TestCase, ASSETS_PATH +from tests import TestCase, ASSETS_PATH, TEST_BASE_PATH from ocrd_browser.model import Document, Page from datetime import datetime from ocrd_models.ocrd_page import PcGtsType @@ -104,7 +104,7 @@ def test_clone(self): def test_save(self): doc = Document.clone(self.path) - with TemporaryDirectory(prefix='browse-ocrd-tests') as directory: + with TemporaryDirectory(prefix='browse-ocrd tests') as directory: saved_mets = directory + '/mets.xml' doc.save_as(saved_mets) saved = Document.load(saved_mets) @@ -172,3 +172,13 @@ def test_modify_when_not_editable(self): def test_modify_when_editable(self): doc = Document.clone(self.path) doc.reorder(['PHYS_0020', 'PHYS_0017']) + + def test_path_with_spaces(self): + path = TEST_BASE_PATH / 'example/workspaces/heavy quoting/mets.xml' + uri = path.as_uri() + doc = Document.load(uri) + page = doc.page_for_id('PHYS_0017', 'OCR-D-GT-PAGE') + image = doc.workspace.image_from_page(page.page, 'PHYS_0017') + # Assert no exceptions happened and a sensible return value + self.assertGreater(image[0].height, 100) +