Skip to content

Commit

Permalink
Add support for reading from TAR archives to flatdata-py (heremaps#182)
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Ocker <[email protected]>
  • Loading branch information
fermeise committed Oct 6, 2021
1 parent 3edaa5d commit 9429af9
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 0 deletions.
6 changes: 6 additions & 0 deletions flatdata-py/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@

Python 3 implementation of [flatdata](https://github.com/heremaps/flatdata).

## Running the tests

```sh
python3 -m nose
```

## Basic usage

Once you have [created a flatdata schema file](../README.md#creating-a-schema), you can generate a Python module to read your existing `flatdata` archive:
Expand Down
102 changes: 102 additions & 0 deletions flatdata-py/flatdata/lib/tar_archive_resource_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
'''
Copyright (c) 2021 HERE Europe B.V.
See the LICENSE file in the root of this project for license details.
'''

import mmap
import tarfile
import os

from .errors import MissingResourceError
from .file_resource_storage import FileResourceStorage


class TarArchiveResourceStorage:
"""
Resource storage based on memory-mapped files.
"""

def __init__(self, tar_map, file_entries, dir_entries, sub_path):
self.tar_map = tar_map
self.file_entries = file_entries
self.dir_entries = dir_entries
self.sub_path = sub_path

@classmethod
def create(cls, tar_path, sub_path=""):
tar_map = FileResourceStorage.memory_map(tar_path)
file_entries = dict()
dir_entries = set()
with tarfile.open(tar_path, "r:") as tar:
for file in tar:
name = file.name
if name.startswith("./"):
name = name[2:]
if file.type == tarfile.GNUTYPE_SPARSE:
raise CorruptResourceError("Sparse files are not supported")
if file.isreg():
file_entries[name] = (file.offset_data, file.size)
if file.isdir():
dir_entries.add(name)

return cls(tar_map, file_entries, dir_entries, sub_path)

def get(self, key, is_optional=False):
path = self._path(key)
if path in self.file_entries:
(offset, length) = self.file_entries[path]
return MemoryMapSection(self.tar_map, offset, length)

if path in self.dir_entries:
return TarArchiveResourceStorage(self.tar_map, self.file_entries, self.dir_entries, path)

if not is_optional:
raise MissingResourceError(key)
else:
return None

def _path(self, key):
if not self.sub_path:
return key
else:
return self.sub_path + '/' + key


class MemoryMapSection:
"""
Represent a slice of a memory mapped file.
Keeps track of its position, as to emulate pointing to a dedicated file.
"""

def __init__(self, inner, offset, length):
self.inner = inner
self.offset = offset
self.length = length;
self.pos = 0

def __len__(self):
return self.size()

def __getitem__(self, key):
if isinstance(key, slice):
start = key.start if key.start is not None else 0
start = self.offset + min(start, self.length)
stop = key.stop if key.stop is not None else self.length
stop = self.offset + min(stop, self.length)
return self.inner[slice(start, stop, key.step)]
else:
if key < self.length:
return self.inner.__getitem__(self.offset + key)
else:
raise IndexError('index out of range')

def read(self, n=None):
if n is None:
n = self.length - self.pos
self.inner.seek(self.offset + self.pos)
data = self.inner.read(min(n, self.length - self.pos))
self.pos += len(data)
return data

def size(self):
return min(self.length, self.inner.size() - self.offset)
41 changes: 41 additions & 0 deletions flatdata-py/tests/test_tar_resource_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from common import *
from flatdata.generator.engine import Engine
from flatdata.lib.file_resource_storage import FileResourceStorage
from flatdata.lib.tar_archive_resource_storage import TarArchiveResourceStorage

import tarfile
import tempfile
import os
from nose.tools import eq_, assert_is_instance


def check_signed_struct(s):
eq_(-0x1, s.a)
eq_(0x01234567, s.b)
eq_(-0x28, s.c)
eq_(0, s.d)


def test_instance_reading():
module = Engine(INSTANCE_TEST_SCHEMA).render_python_module()
valid_data = {
"Archive.archive": ARCHIVE_SIGNATURE_PAYLOAD,
"Archive.archive.schema": module.backward_compatibility_Archive.schema().encode(),
"resource": RESOURCE_PAYLOAD,
"resource.schema": module.backward_compatibility_Archive.resource_schema('resource').encode()
}

with tempfile.TemporaryDirectory() as tmpdir:
archive_path = os.path.join(tmpdir, "archive.tar")
cwd = os.getcwd()
os.chdir(tmpdir)
tar = tarfile.open(archive_path, "w")
for key, value in valid_data.items():
with open(os.path.join(tmpdir, key), "wb") as file:
file.write(value)
tar.add(key)
tar.close()
os.chdir(cwd)

archive = module.backward_compatibility_Archive(TarArchiveResourceStorage.create(archive_path))
check_signed_struct(archive.resource)

0 comments on commit 9429af9

Please sign in to comment.