Skip to content

Commit

Permalink
file contents generation use samplefilegenerator rng
Browse files Browse the repository at this point in the history
  • Loading branch information
tclose committed Sep 27, 2024
1 parent 26979f6 commit 4fee622
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
19 changes: 14 additions & 5 deletions fileformats/core/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import fileformats.core


FILE_FILL_LENGTH_DEFAULT = 256


class SampleFileGenerator:
"""Generates sample files. Designed to be used within generate_sample_data overrides
Expand Down Expand Up @@ -48,6 +51,14 @@ def _generate_fname_stem(self) -> str:
def rng(self) -> random.Random:
return random.Random(self.seed)

def generate_contents(
self, binary: bool, fill: int = FILE_FILL_LENGTH_DEFAULT
) -> ty.Union[str, bytes]:
if binary:
return bytes(self.rng.choices(list(range(256)), k=fill))
else:
return "".join(self.rng.choices(string.printable, k=fill))

def generate(
self,
file_type: ty.Union[
Expand Down Expand Up @@ -88,14 +99,12 @@ def generate(
fspath.parent.mkdir(parents=True, exist_ok=True)
is_binary: bool = getattr(file_type, "binary", False)
if not contents:
contents = (
bytes(random.choices(list(range(256)), k=fill))
if is_binary
else "".join(random.choices(string.printable, k=fill))
contents = self.generate_contents(
is_binary, fill if fill else FILE_FILL_LENGTH_DEFAULT
)
else:
contents_type = bytes if is_binary else str
if not isinstance(contents, bytes):
if not isinstance(contents, contents_type):
raise TypeError(
f"contents must be {contents_type} for {file_type} files, "
f"not {type(contents)}"
Expand Down
10 changes: 6 additions & 4 deletions fileformats/generic/generate_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,22 @@ def file_generate_sample_data(
file: File,
generator: SampleFileGenerator,
) -> ty.List[Path]:
contents = os.urandom(FILE_FILL_LENGTH)
contents = None
if getattr(file, "binary", False):
if hasattr(file, "magic_number"):
contents = generator.generate_contents(binary=True)
magic_number: bytes
if isinstance(file.magic_number, str):
magic_number = bytes.fromhex(file.magic_number)
else:
magic_number = file.magic_number
offset = getattr(file, "magic_number_offset", 0)
if offset < 0:
postamble = os.urandom(-(len(magic_number) + offset))
contents += magic_number + postamble
contents += magic_number + postamble # type: ignore[operator]

Check warning on line 42 in fileformats/generic/generate_sample_data.py

View check run for this annotation

Codecov / codecov/patch

fileformats/generic/generate_sample_data.py#L41-L42

Added lines #L41 - L42 were not covered by tests
else:
preamble = os.urandom(offset)
contents = preamble + magic_number + contents
preamble = generator.generate_contents(binary=True, fill=offset)
contents = preamble + magic_number + contents # type: ignore[operator]
elif hasattr(file, "magic_pattern"):
raise NotImplementedError(
"Sampling of magic version file types is not implemented yet"
Expand Down

0 comments on commit 4fee622

Please sign in to comment.