Skip to content

Commit

Permalink
support path names with spaces, fixes #40
Browse files Browse the repository at this point in the history
  • Loading branch information
hnesk committed Apr 21, 2022
1 parent a4a8844 commit 13459bf
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 7 deletions.
18 changes: 13 additions & 5 deletions ocrd_browser/model/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from pathlib import Path
from tempfile import mkdtemp
from datetime import datetime
from urllib.parse import urlparse
from urllib.parse import urlparse, unquote
# noinspection PyProtectedMember
from lxml.etree import ElementBase as Element, _ElementTree as ElementTree

Expand Down Expand Up @@ -72,9 +72,9 @@ def load(cls, mets_url: Union[Path, str] = None, emitter: EventCallBack = None)
"""
if not mets_url:
return cls.create(emitter=emitter)
mets_url = cls._strip_local(mets_url)
mets_path = cls._to_path(mets_url)

workspace = Resolver().workspace_from_url(mets_url, download=False)
workspace = Resolver().workspace_from_url(str(mets_path), download=False)
doc = cls(workspace, emitter=emitter, original_url=mets_url)
doc._empty = False
return doc
Expand Down Expand Up @@ -110,7 +110,7 @@ def save(self, backup_directory: Union[bool, Path, str] = True) -> None:

def save_as(self, mets_url: Union[Path, str], backup_directory: Union[bool, Path, str] = True) -> None:
log = getLogger('ocrd_browser.model.document.Document.save_as')
mets_path = Path(self._strip_local(mets_url, disallow_remote=True))
mets_path = self._to_path(mets_url)

workspace_directory = mets_path.parent
if workspace_directory.exists():
Expand Down Expand Up @@ -496,11 +496,19 @@ def _emit(self, event: str, *args: Any) -> None:
def _strip_local(mets_url: Union[Path, str], disallow_remote: bool = True) -> str:
result = urlparse(str(mets_url))
if result.scheme == 'file' or result.scheme == '':
mets_url = result.path
mets_url = unquote(result.path)
elif disallow_remote:
raise ValueError('invalid url {}'.format(mets_url))
return str(mets_url)

@staticmethod
def _to_path(mets_url: Union[Path, str]) -> Path:
result = urlparse(str(mets_url))
if not (result.scheme == 'file' or result.scheme == ''):
raise ValueError('invalid local path/url {}'.format(mets_url))
return Path(unquote(result.path))


@staticmethod
def _derive_backup_directory(workspace_directory: Path, now: datetime = None) -> Path:
now = now or datetime.now()
Expand Down
14 changes: 12 additions & 2 deletions tests/model/test_document.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path
from tempfile import TemporaryDirectory

from tests import TestCase, ASSETS_PATH
from tests import TestCase, ASSETS_PATH, TEST_BASE_PATH
from ocrd_browser.model import Document, Page
from datetime import datetime
from ocrd_models.ocrd_page import PcGtsType
Expand Down Expand Up @@ -104,7 +104,7 @@ def test_clone(self):

def test_save(self):
doc = Document.clone(self.path)
with TemporaryDirectory(prefix='browse-ocrd-tests') as directory:
with TemporaryDirectory(prefix='browse-ocrd tests') as directory:
saved_mets = directory + '/mets.xml'
doc.save_as(saved_mets)
saved = Document.load(saved_mets)
Expand Down Expand Up @@ -172,3 +172,13 @@ def test_modify_when_not_editable(self):
def test_modify_when_editable(self):
doc = Document.clone(self.path)
doc.reorder(['PHYS_0020', 'PHYS_0017'])

def test_path_with_spaces(self):
path = TEST_BASE_PATH / 'example/workspaces/heavy quoting/mets.xml'
uri = path.as_uri()
doc = Document.load(uri)
page = doc.page_for_id('PHYS_0017', 'OCR-D-GT-PAGE')
image = doc.workspace.image_from_page(page.page, 'PHYS_0017')
# Assert no exceptions happened and a sensible return value
self.assertGreater(image[0].height, 100)

0 comments on commit 13459bf

Please sign in to comment.