forked from heremaps/flatdata
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for reading from TAR archives to flatdata-py (heremaps#182)
Signed-off-by: Christian Ocker <[email protected]>
- Loading branch information
Showing
3 changed files
with
149 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
102 changes: 102 additions & 0 deletions
102
flatdata-py/flatdata/lib/tar_archive_resource_storage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
''' | ||
Copyright (c) 2021 HERE Europe B.V. | ||
See the LICENSE file in the root of this project for license details. | ||
''' | ||
|
||
import mmap | ||
import tarfile | ||
import os | ||
|
||
from .errors import MissingResourceError | ||
from .file_resource_storage import FileResourceStorage | ||
|
||
|
||
class TarArchiveResourceStorage: | ||
""" | ||
Resource storage based on memory-mapped files. | ||
""" | ||
|
||
def __init__(self, tar_map, file_entries, dir_entries, sub_path): | ||
self.tar_map = tar_map | ||
self.file_entries = file_entries | ||
self.dir_entries = dir_entries | ||
self.sub_path = sub_path | ||
|
||
@classmethod | ||
def create(cls, tar_path, sub_path=""): | ||
tar_map = FileResourceStorage.memory_map(tar_path) | ||
file_entries = dict() | ||
dir_entries = set() | ||
with tarfile.open(tar_path, "r:") as tar: | ||
for file in tar: | ||
name = file.name | ||
if name.startswith("./"): | ||
name = name[2:] | ||
if file.type == tarfile.GNUTYPE_SPARSE: | ||
raise CorruptResourceError("Sparse files are not supported") | ||
if file.isreg(): | ||
file_entries[name] = (file.offset_data, file.size) | ||
if file.isdir(): | ||
dir_entries.add(name) | ||
|
||
return cls(tar_map, file_entries, dir_entries, sub_path) | ||
|
||
def get(self, key, is_optional=False): | ||
path = self._path(key) | ||
if path in self.file_entries: | ||
(offset, length) = self.file_entries[path] | ||
return MemoryMapSection(self.tar_map, offset, length) | ||
|
||
if path in self.dir_entries: | ||
return TarArchiveResourceStorage(self.tar_map, self.file_entries, self.dir_entries, path) | ||
|
||
if not is_optional: | ||
raise MissingResourceError(key) | ||
else: | ||
return None | ||
|
||
def _path(self, key): | ||
if not self.sub_path: | ||
return key | ||
else: | ||
return self.sub_path + '/' + key | ||
|
||
|
||
class MemoryMapSection: | ||
""" | ||
Represent a slice of a memory mapped file. | ||
Keeps track of its position, as to emulate pointing to a dedicated file. | ||
""" | ||
|
||
def __init__(self, inner, offset, length): | ||
self.inner = inner | ||
self.offset = offset | ||
self.length = length; | ||
self.pos = 0 | ||
|
||
def __len__(self): | ||
return self.size() | ||
|
||
def __getitem__(self, key): | ||
if isinstance(key, slice): | ||
start = key.start if key.start is not None else 0 | ||
start = self.offset + min(start, self.length) | ||
stop = key.stop if key.stop is not None else self.length | ||
stop = self.offset + min(stop, self.length) | ||
return self.inner[slice(start, stop, key.step)] | ||
else: | ||
if key < self.length: | ||
return self.inner.__getitem__(self.offset + key) | ||
else: | ||
raise IndexError('index out of range') | ||
|
||
def read(self, n=None): | ||
if n is None: | ||
n = self.length - self.pos | ||
self.inner.seek(self.offset + self.pos) | ||
data = self.inner.read(min(n, self.length - self.pos)) | ||
self.pos += len(data) | ||
return data | ||
|
||
def size(self): | ||
return min(self.length, self.inner.size() - self.offset) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
from common import * | ||
from flatdata.generator.engine import Engine | ||
from flatdata.lib.file_resource_storage import FileResourceStorage | ||
from flatdata.lib.tar_archive_resource_storage import TarArchiveResourceStorage | ||
|
||
import tarfile | ||
import tempfile | ||
import os | ||
from nose.tools import eq_, assert_is_instance | ||
|
||
|
||
def check_signed_struct(s): | ||
eq_(-0x1, s.a) | ||
eq_(0x01234567, s.b) | ||
eq_(-0x28, s.c) | ||
eq_(0, s.d) | ||
|
||
|
||
def test_instance_reading(): | ||
module = Engine(INSTANCE_TEST_SCHEMA).render_python_module() | ||
valid_data = { | ||
"Archive.archive": ARCHIVE_SIGNATURE_PAYLOAD, | ||
"Archive.archive.schema": module.backward_compatibility_Archive.schema().encode(), | ||
"resource": RESOURCE_PAYLOAD, | ||
"resource.schema": module.backward_compatibility_Archive.resource_schema('resource').encode() | ||
} | ||
|
||
with tempfile.TemporaryDirectory() as tmpdir: | ||
archive_path = os.path.join(tmpdir, "archive.tar") | ||
cwd = os.getcwd() | ||
os.chdir(tmpdir) | ||
tar = tarfile.open(archive_path, "w") | ||
for key, value in valid_data.items(): | ||
with open(os.path.join(tmpdir, key), "wb") as file: | ||
file.write(value) | ||
tar.add(key) | ||
tar.close() | ||
os.chdir(cwd) | ||
|
||
archive = module.backward_compatibility_Archive(TarArchiveResourceStorage.create(archive_path)) | ||
check_signed_struct(archive.resource) |