From 3e94d0cae75454944bd77d3c2bcc088799d69b08 Mon Sep 17 00:00:00 2001 From: Roel de Jong <12800443+twiggler@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:35:01 +0200 Subject: [PATCH 1/5] Take header size into account when reading non-resident symlinks --- dissect/xfs/xfs.py | 5 ++++- tests/data/.gitattributes | 1 + tests/data/xfs_symlink_long.bin.gz | 3 +++ tests/test_xfs.py | 5 ++++- 4 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 tests/data/.gitattributes create mode 100644 tests/data/xfs_symlink_long.bin.gz diff --git a/dissect/xfs/xfs.py b/dissect/xfs/xfs.py index 769a297..e02fd3a 100644 --- a/dissect/xfs/xfs.py +++ b/dissect/xfs/xfs.py @@ -325,7 +325,10 @@ def link(self) -> str: if not self._link: if self.inode.di_format != c_xfs.xfs_dinode_fmt.XFS_DINODE_FMT_LOCAL and self.xfs.version == 5: - fh = self.open() + # We do not use open because for non-resident symlinks self.size does not include the symlink header + runs = self.dataruns() + symlink_size = c_xfs.xfs_dsymlink_hdr.size + self.size + fh = RunlistStream(self.xfs.fh, runs, symlink_size, self.xfs.block_size) header = c_xfs.xfs_dsymlink_hdr(fh) if header.sl_magic != c_xfs.XFS_SYMLINK_MAGIC: diff --git a/tests/data/.gitattributes b/tests/data/.gitattributes new file mode 100644 index 0000000..2014686 --- /dev/null +++ b/tests/data/.gitattributes @@ -0,0 +1 @@ +xfs_symlink_long.bin.gz filter=lfs diff=lfs merge=lfs -text diff --git a/tests/data/xfs_symlink_long.bin.gz b/tests/data/xfs_symlink_long.bin.gz new file mode 100644 index 0000000..6c289d0 --- /dev/null +++ b/tests/data/xfs_symlink_long.bin.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff94bd598afb51a98eb02295d584cfc34a59dd5fde6f7161bf32c5ef1ac20b3e +size 50458 diff --git a/tests/test_xfs.py b/tests/test_xfs.py index aa2ce67..670c09f 100644 --- a/tests/test_xfs.py +++ b/tests/test_xfs.py @@ -73,6 +73,7 @@ def test_xfs_bigtime(xfs_bigtime_bin): ("tests/data/xfs_symlink_test1.bin.gz"), ("tests/data/xfs_symlink_test2.bin.gz"), ("tests/data/xfs_symlink_test3.bin.gz"), + ("tests/data/xfs_symlink_long.bin.gz"), ], ) def test_symlinks(image_file): @@ -85,4 +86,6 @@ def resolve(node): return node with gzip.open(image_file, "rb") as disk: - assert resolve(XFS(disk).get(path)).open().read() == expect + link_inode = resolve(XFS(disk).get(path)) + assert link_inode.nblocks == 1 + assert link_inode.open().read() == expect From eec7ae6281e094be77c7843176d395c421fbef8f Mon Sep 17 00:00:00 2001 From: Roel de Jong <12800443+twiggler@users.noreply.github.com> Date: Mon, 14 Oct 2024 15:42:07 +0200 Subject: [PATCH 2/5] Modernize typings --- dissect/xfs/xfs.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dissect/xfs/xfs.py b/dissect/xfs/xfs.py index e02fd3a..cb6dff4 100644 --- a/dissect/xfs/xfs.py +++ b/dissect/xfs/xfs.py @@ -6,7 +6,7 @@ import stat from datetime import datetime from functools import lru_cache -from typing import BinaryIO, Iterator, Optional, Union +from typing import BinaryIO, Iterator from uuid import UUID from dissect.util import ts @@ -55,7 +55,7 @@ def __init__(self, fh: BinaryIO): self.root = self.get_inode(self.sb.sb_rootino) - def get(self, path: Union[int, str], node: Optional[INode] = None) -> INode: + def get(self, path: int | str, node: INode | None = None) -> INode: if isinstance(path, int): return self.get_inode(path) @@ -102,14 +102,14 @@ def walk_extents(self, block: int) -> Iterator[tuple[int, int, int, int]]: for record in self.walk_large_tree(block, 16, (c_xfs.XFS_BMAP_MAGIC, c_xfs.XFS_BMAP_CRC_MAGIC)): yield parse_fsblock(record) - def walk_large_tree(self, block: int, leaf_size: int, magic: Optional[list[int]] = None) -> Iterator[bytes]: + def walk_large_tree(self, block: int, leaf_size: int, magic: list[int] | None = None) -> Iterator[bytes]: self.fh.seek(block * self.block_size) root = self._lblock_s(self.fh) yield from self._walk_large_tree(root, leaf_size, magic) def walk_small_tree( - self, block: int, agnum: int, leaf_size: int, magic: Optional[list[int]] = None + self, block: int, agnum: int, leaf_size: int, magic: list[int] | None = None ) -> Iterator[bytes]: block = agnum * self.sb.sb_agblocks + block self.fh.seek(block * self.block_size) @@ -122,7 +122,7 @@ def _walk_small_tree( node: c_xfs.xfs_btree_sblock | c_xfs.xfs_btree_sblock_crc, leaf_size: int, agnum: int, - magic: Optional[list[int]] = None, + magic: list[int] | None = None, ) -> Iterator[bytes]: fh = self.fh if magic and node.bb_magic not in magic: @@ -148,7 +148,7 @@ def _walk_large_tree( self, node: c_xfs.xfs_btree_lblock | c_xfs.xfs_btree_lblock_crc, leaf_size: int, - magic: Optional[list[int]] = None, + magic: list[int] | None = None, ) -> Iterator[bytes]: fh = self.fh if magic and node.bb_magic not in magic: @@ -210,9 +210,9 @@ def __init__(self, xfs: XFS, fh: BinaryIO, num: int): def get_inode( self, inum: int, - filename: Optional[str] = None, - filetype: Optional[int] = None, - parent: Optional[INode] = None, + filename: str | None = None, + filetype: int | None = None, + parent: INode | None = None, lazy: bool = False, ) -> INode: inode = INode(self, inum, filename, filetype, parent=parent) @@ -230,7 +230,7 @@ def walk_extents(self, fsb: int) -> Iterator[tuple[int, int, int, int]]: def walk_agi(self) -> Iterator[c_xfs.xfs_inobt_rec]: yield from self.xfs.walk_agi(self.agi.agi_root, self.num) - def walk_tree(self, fsb: int, magic: Optional[list[int]] = None, small: bool = False): + def walk_tree(self, fsb: int, magic: list[int] | None = None, small: bool = False): agnum, blknum = fsb_to_bb(fsb, self.sb.sb_agblklog) block = agnum * self.xfs.sb.sb_agblocks + blknum @@ -245,9 +245,9 @@ def __init__( self, ag: AllocationGroup, inum: int, - filename: Optional[str] = None, - filetype: Optional[int] = None, - parent: Optional[INode] = None, + filename: str | None = None, + filetype: int | None = None, + parent: INode | None = None, ): self.ag = ag self.xfs = ag.xfs @@ -517,7 +517,7 @@ def attrfork(self) -> BinaryIO: return RangeStream(self._buf, offset, size) - def dataruns(self) -> list[tuple[Optional[int], int]]: + def dataruns(self) -> list[tuple[int | None, int]]: if not self._runlist: runs = [] run_offset = 0 From f65cd0b7ab38b74e9aaef2907ed82374d9edb6d6 Mon Sep 17 00:00:00 2001 From: Roel de Jong <12800443+twiggler@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:47:52 +0200 Subject: [PATCH 3/5] Handle symlink target distributed over muliple extents --- dissect/xfs/xfs.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dissect/xfs/xfs.py b/dissect/xfs/xfs.py index cb6dff4..a79ba76 100644 --- a/dissect/xfs/xfs.py +++ b/dissect/xfs/xfs.py @@ -230,7 +230,7 @@ def walk_extents(self, fsb: int) -> Iterator[tuple[int, int, int, int]]: def walk_agi(self) -> Iterator[c_xfs.xfs_inobt_rec]: yield from self.xfs.walk_agi(self.agi.agi_root, self.num) - def walk_tree(self, fsb: int, magic: list[int] | None = None, small: bool = False): + def walk_tree(self, fsb: int, magic: list[int] | None = None, small: bool = False) -> Iterator[bytes]: agnum, blknum = fsb_to_bb(fsb, self.sb.sb_agblklog) block = agnum * self.xfs.sb.sb_agblocks + blknum @@ -248,7 +248,7 @@ def __init__( filename: str | None = None, filetype: int | None = None, parent: INode | None = None, - ): + ) -> None: self.ag = ag self.xfs = ag.xfs self.inum = inum + (ag.num << ag._inum_bits) @@ -325,10 +325,16 @@ def link(self) -> str: if not self._link: if self.inode.di_format != c_xfs.xfs_dinode_fmt.XFS_DINODE_FMT_LOCAL and self.xfs.version == 5: + # Almost always, symlinks (max size of 1024) fit within a block. If the block size if 512, we might + # need three blocks. These three blocks could theoretially be distributed over multiple extents. + # Linux kernel handles this by using sl_offset to piece the symlink back together. + # As this edge case of an edge case is very unlikely, it is unsupported until we observe it. + if len(self.dataruns()) > 1: + raise NotImplementedError(f"{self!r} has a symlink distributed over multiple extents") + # We do not use open because for non-resident symlinks self.size does not include the symlink header - runs = self.dataruns() - symlink_size = c_xfs.xfs_dsymlink_hdr.size + self.size - fh = RunlistStream(self.xfs.fh, runs, symlink_size, self.xfs.block_size) + symlink_size = len(c_xfs.xfs_dsymlink_hdr) + self.size + fh = RunlistStream(self.xfs.fh, self.dataruns(), symlink_size, self.xfs.block_size) header = c_xfs.xfs_dsymlink_hdr(fh) if header.sl_magic != c_xfs.XFS_SYMLINK_MAGIC: From 49e48bcbb126680665cdd35e890054e24cb43307 Mon Sep 17 00:00:00 2001 From: Roel de Jong <12800443+twiggler@users.noreply.github.com> Date: Tue, 15 Oct 2024 09:23:08 +0200 Subject: [PATCH 4/5] Add ticket link --- dissect/xfs/xfs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dissect/xfs/xfs.py b/dissect/xfs/xfs.py index a79ba76..30351c0 100644 --- a/dissect/xfs/xfs.py +++ b/dissect/xfs/xfs.py @@ -329,6 +329,7 @@ def link(self) -> str: # need three blocks. These three blocks could theoretially be distributed over multiple extents. # Linux kernel handles this by using sl_offset to piece the symlink back together. # As this edge case of an edge case is very unlikely, it is unsupported until we observe it. + # Ticket: https://github.com/fox-it/dissect.xfs/issues/36 if len(self.dataruns()) > 1: raise NotImplementedError(f"{self!r} has a symlink distributed over multiple extents") From c9ca5bf943b0e08791c2243f36beee7bf7be8c0e Mon Sep 17 00:00:00 2001 From: Roel de Jong <12800443+twiggler@users.noreply.github.com> Date: Thu, 17 Oct 2024 09:16:48 +0200 Subject: [PATCH 5/5] Consolidate .gitattributes --- .gitattributes | 1 + tests/data/.gitattributes | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 tests/data/.gitattributes diff --git a/.gitattributes b/.gitattributes index 6e51826..24ae162 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4,3 +4,4 @@ tests/data/xfs_symlink_test3.bin.gz filter=lfs diff=lfs merge=lfs -text tests/data/xfs_bigtime.bin.gz filter=lfs diff=lfs merge=lfs -text tests/data/xfs.bin.gz filter=lfs diff=lfs merge=lfs -text tests/data/xfs_sparse.bin.gz filter=lfs diff=lfs merge=lfs -text +tests/data/xfs_symlink_long.bin.gz filter=lfs diff=lfs merge=lfs -text diff --git a/tests/data/.gitattributes b/tests/data/.gitattributes deleted file mode 100644 index 2014686..0000000 --- a/tests/data/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -xfs_symlink_long.bin.gz filter=lfs diff=lfs merge=lfs -text