Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle negative magic number offsets in sample file generation #90

Merged
merged 3 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion extras/fileformats/extras/application/medical.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import sys
import typing as ty
from pathlib import Path
from typing_extensions import TypeAlias
import pydicom.tag
from fileformats.core import FileSet, extra_implementation
from fileformats.application import Dicom
import medimages4tests.dummy.dicom.mri.t1w.siemens.skyra.syngo_d13c
from fileformats.core import SampleFileGenerator

if sys.version_info <= (3, 11):
from typing_extensions import TypeAlias
else:
from typing import TypeAlias

TagListType: TypeAlias = ty.Union[
ty.List[int],
ty.List[str],
Expand Down
19 changes: 14 additions & 5 deletions fileformats/core/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import fileformats.core


FILE_FILL_LENGTH_DEFAULT = 256


class SampleFileGenerator:
"""Generates sample files. Designed to be used within generate_sample_data overrides

Expand Down Expand Up @@ -48,6 +51,14 @@ def _generate_fname_stem(self) -> str:
def rng(self) -> random.Random:
return random.Random(self.seed)

def generate_contents(
self, binary: bool, fill: int = FILE_FILL_LENGTH_DEFAULT
) -> ty.Union[str, bytes]:
if binary:
return bytes(self.rng.choices(list(range(256)), k=fill))
else:
return "".join(self.rng.choices(string.printable, k=fill))

def generate(
self,
file_type: ty.Union[
Expand Down Expand Up @@ -88,14 +99,12 @@ def generate(
fspath.parent.mkdir(parents=True, exist_ok=True)
is_binary: bool = getattr(file_type, "binary", False)
if not contents:
contents = (
bytes(random.choices(list(range(256)), k=fill))
if is_binary
else "".join(random.choices(string.printable, k=fill))
contents = self.generate_contents(
is_binary, fill if fill else FILE_FILL_LENGTH_DEFAULT
)
else:
contents_type = bytes if is_binary else str
if not isinstance(contents, bytes):
if not isinstance(contents, contents_type):
raise TypeError(
f"contents must be {contents_type} for {file_type} files, "
f"not {type(contents)}"
Expand Down
17 changes: 12 additions & 5 deletions fileformats/generic/generate_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,19 @@
contents = None
if getattr(file, "binary", False):
if hasattr(file, "magic_number"):
contents = generator.generate_contents(binary=True)
magic_number: bytes
if isinstance(file.magic_number, str):
magic_number = bytes.fromhex(file.magic_number)
else:
magic_number = file.magic_number
offset = getattr(file, "magic_number_offset", 0)
contents = os.urandom(offset)
magic_number = getattr(file, "magic_number", b"")
if isinstance(magic_number, str):
magic_number = bytes.fromhex(magic_number)
contents += magic_number
if offset < 0:
postamble = os.urandom(-(len(magic_number) + offset))
contents += magic_number + postamble # type: ignore[operator]

Check warning on line 42 in fileformats/generic/generate_sample_data.py

View check run for this annotation

Codecov / codecov/patch

fileformats/generic/generate_sample_data.py#L41-L42

Added lines #L41 - L42 were not covered by tests
else:
preamble = generator.generate_contents(binary=True, fill=offset)
contents = preamble + magic_number + contents # type: ignore[operator]
elif hasattr(file, "magic_pattern"):
raise NotImplementedError(
"Sampling of magic version file types is not implemented yet"
Expand Down
Loading