-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use newer tarfile to handle bundles more safely #197
base: master
Are you sure you want to change the base?
Changes from all commits
9313dab
f7b93e4
7106a5a
7cb08f7
9bdacbf
3b272ff
b675a23
cdd3b03
c67aeb3
bc0f054
1f3266b
843a79f
1ff7c78
8ff1cc7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. | ||
drwxr-xr-x ./ | ||
drwxr-xr-x ./baz/ | ||
-rw-r--r-- ./baz/__load__.zeek | ||
-rw-r--r-- ./baz/zkg.meta | ||
-rw-r--r-- ./manifest.txt | ||
drwxr-xr-x ./rot13/ | ||
-rw-r--r-- ./rot13/CHANGES | ||
-rw-r--r-- ./rot13/CMakeLists.txt | ||
-rw-r--r-- ./rot13/COPYING.edit-me | ||
-rw-r--r-- ./rot13/Makefile | ||
-rw-r--r-- ./rot13/README | ||
-rw-r--r-- ./rot13/VERSION | ||
-rwxr-xr-x ./rot13/configure | ||
-rw-r--r-- ./rot13/configure.plugin | ||
drwxr-xr-x ./rot13/scripts/ | ||
drwxr-xr-x ./rot13/scripts/Demo/ | ||
drwxr-xr-x ./rot13/scripts/Demo/Rot13/ | ||
-rw-r--r-- ./rot13/scripts/Demo/Rot13/__load__.zeek | ||
-rw-r--r-- ./rot13/scripts/__load__.zeek | ||
-rw-r--r-- ./rot13/scripts/__preload__.zeek | ||
-rw-r--r-- ./rot13/scripts/init.zeek | ||
-rw-r--r-- ./rot13/scripts/types.zeek | ||
drwxr-xr-x ./rot13/src/ | ||
-rw-r--r-- ./rot13/src/Plugin.cc | ||
-rw-r--r-- ./rot13/src/Plugin.h | ||
-rw-r--r-- ./rot13/src/rot13.bif | ||
drwxr-xr-x ./rot13/testing/ | ||
drwxr-xr-x ./rot13/testing/Baseline/ | ||
drwxr-xr-x ./rot13/testing/Baseline/tests.main/ | ||
-rw-r--r-- ./rot13/testing/Baseline/tests.main/output | ||
drwxr-xr-x ./rot13/testing/Baseline/tests.rot13/ | ||
-rw-r--r-- ./rot13/testing/Baseline/tests.rot13/output | ||
-rw-r--r-- ./rot13/testing/btest.cfg | ||
drwxr-xr-x ./rot13/testing/tests/ | ||
-rw-r--r-- ./rot13/testing/tests/main | ||
-rw-r--r-- ./rot13/testing/tests/rot13 | ||
-rw-r--r-- ./rot13/zkg.meta |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
### BTest baseline data generated by btest-diff. Do not edit. Use "btest -U/-u" to update. Requires BTest >= 0.63. | ||
-rw-r--r-- state/script_dir/packages/rot13/__load__.zeek | ||
drwxr-xr-x state/clones/package/rot13/.git | ||
-rw-r--r-- state/clones/package/rot13/CHANGES | ||
-rw-r--r-- state/clones/package/rot13/CMakeLists.txt | ||
-rw-r--r-- state/clones/package/rot13/COPYING.edit-me | ||
-rw-r--r-- state/clones/package/rot13/Makefile | ||
-rw-r--r-- state/clones/package/rot13/README | ||
-rw-r--r-- state/clones/package/rot13/VERSION | ||
drwxr-xr-x state/clones/package/rot13/build | ||
-rwxr-xr-x state/clones/package/rot13/configure | ||
-rw-r--r-- state/clones/package/rot13/configure.plugin | ||
drwxr-xr-x state/clones/package/rot13/scripts | ||
drwxr-xr-x state/clones/package/rot13/src | ||
drwxr-xr-x state/clones/package/rot13/testing | ||
-rw-r--r-- state/clones/package/rot13/zkg.meta |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
# @TEST-EXEC: bash %INPUT | ||
|
||
# @TEST-EXEC: zkg purge | ||
# @TEST-EXEC: umask 0027 && zkg bundle test.bundle --manifest manifest.txt | ||
|
||
# Check that permissions in the bundle match expectations. | ||
# NOTE: We only extract the first (permission) and last column (filename) to be generic over GNU and BSD tar. | ||
# @TEST-EXEC: tar tvf test.bundle | grep -v git | awk '{ print $1, $NF }' > bundle.out | ||
# @TEST-EXEC: btest-diff bundle.out | ||
# @TEST-EXEC: umask 0027 && zkg --config=my_zkg.config unbundle --force --replace test.bundle | ||
|
||
# Check that permissions of the extracted bundle match expectations. | ||
# @TEST-EXEC: python3 ./list-dir.py state/script_dir/packages/rot13 state/clones/package/rot13 > unbundle.out | ||
# @TEST-EXEC: btest-diff unbundle.out | ||
|
||
# make sure the on-disk permissions will differ | ||
chmod -R g-w,o-rwx packages/rot13 | ||
|
||
echo "$(pwd)/packages/rot13" >> sources/one/bob/zkg.index | ||
cd sources/one | ||
git commit -am 'add rot13 package' | ||
cd ../.. | ||
|
||
echo "[bundle]" > manifest.txt | ||
default_branch_name=$( cd packages/baz && git rev-parse --abbrev-ref HEAD ) | ||
echo "baz = ${default_branch_name}" >> manifest.txt | ||
default_branch_name=$( cd packages/rot13 && git rev-parse --abbrev-ref HEAD ) | ||
echo "rot13 = ${default_branch_name}" >> manifest.txt | ||
|
||
cat << EOF > my_zkg.config | ||
[paths] | ||
state_dir = $(pwd)/state | ||
|
||
EOF | ||
|
||
# @TEST-START-FILE list-dir.py | ||
#!/usr/bin/env python3 | ||
|
||
"""System-independent helper to print permissions of entries in directories.""" | ||
|
||
import os | ||
import stat | ||
import sys | ||
from pathlib import Path | ||
|
||
DIRS = map(lambda d: Path(d), sys.argv[1:]) | ||
for d in DIRS: | ||
for e in sorted(os.listdir(d)): | ||
f = d / e | ||
s = os.stat(f, follow_symlinks=False) | ||
print(f"{stat.filemode(s.st_mode)} {f}") | ||
# @TEST-END-FILE |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,12 +7,31 @@ | |
import os | ||
import shutil | ||
import string | ||
import tarfile | ||
import types | ||
|
||
import git | ||
import semantic_version as semver | ||
|
||
from .vendor import tarfile | ||
|
||
|
||
class UmaskContext: | ||
prev_umask = None | ||
|
||
def __init__(self, new_umask): | ||
self.new_umask = new_umask | ||
|
||
def __enter__(self): | ||
if self.prev_umask is not None: | ||
raise ValueError("Can't nest umask context") | ||
|
||
self.prev_umask = os.umask(self.new_umask) | ||
|
||
def __exit__(self, _type, value, traceback): | ||
if self.prev_umask is not None: | ||
os.umask(self.prev_umask) | ||
self.prev_umask = None | ||
|
||
|
||
def make_dir(path): | ||
"""Create a directory or do nothing if it already exists. | ||
|
@@ -69,37 +88,122 @@ def make_symlink(target_path, link_path, force=True): | |
raise error | ||
|
||
|
||
def safe_tarfile_extractall(tfile, destdir): | ||
"""Wrapper to tarfile.extractall(), checking for path traversal. | ||
def zkg_tarfile_create(basedir): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add PEP257 docstrings for the functions you are adding? |
||
"""Create a tar of the bundle files at `basedir` | ||
|
||
Args: | ||
basedir (str): the path to the bundle root | ||
""" | ||
compression = "gz" | ||
tar_name = "".join((basedir, ".tar.", compression)) | ||
|
||
with tarfile.open(tar_name, "w:" + compression) as tar: | ||
tar.add(basedir, arcname=".", filter=zkg_tarfile_create_filter) | ||
|
||
return tar_name | ||
|
||
This adds the safeguards the Python docs for tarfile.extractall warn about: | ||
|
||
Never extract archives from untrusted sources without prior inspection. It | ||
is possible that files are created outside of path, e.g. members that have | ||
absolute filenames starting with "/" or filenames with two dots "..". | ||
def zkg_tarfile_extractall(tfile, destdir, umask=None): | ||
"""Wrapper to tarfile.extractall() using our filter that calls data_filter. | ||
|
||
This adds a lot of sanity checking for the tar file. | ||
|
||
A zeek package contains arbitrary code that will be executed with the same | ||
permissions as the user running zeek. Never extract archives from untrusted sources. | ||
|
||
Args: | ||
tfile (str): the tar file to extract | ||
|
||
destdir (str): the destination directory into which to place contents | ||
|
||
Raises: | ||
Exception: if the tarfile would extract outside destdir | ||
Exception: see tarfile.data_filter, tarfile.TarFile.extractall | ||
""" | ||
|
||
def is_within_directory(directory, target): | ||
abs_directory = os.path.abspath(directory) | ||
abs_target = os.path.abspath(target) | ||
prefix = os.path.commonprefix([abs_directory, abs_target]) | ||
return prefix == abs_directory | ||
|
||
with tarfile.open(tfile) as tar: | ||
for member in tar.getmembers(): | ||
member_path = os.path.join(destdir, member.name) | ||
if not is_within_directory(destdir, member_path): | ||
raise Exception("attempted path traversal in tarfile") | ||
tar.extractall( | ||
destdir, | ||
filter=lambda member, dest_path: zkg_tarfile_extract_filter( | ||
member, | ||
dest_path, | ||
umask=umask, | ||
), | ||
) | ||
|
||
|
||
def zkg_update_perms(member, extract, umask=None): | ||
"""Returns a dict of attributes that should be modified on member to result in our | ||
desired permissions set. If extract is set, we set owner/group to None, otherwise | ||
they are set to root/root. | ||
|
||
Args: | ||
member (tarfile.TarInfo): tarfile member info | ||
|
||
extract (bool): whether or not we are extracting | ||
|
||
umask (integer): optional umask to apply | ||
|
||
Returns: | ||
dict: member attributes to be replaced and their new values | ||
""" | ||
|
||
new_attrs = {} | ||
|
||
if umask is None: | ||
umask = 0 | ||
|
||
# we are doing our own thing with `mode` here | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just the inverse of what was discussed in the ticket: If a user creates a tar by hand or with custom tooling that contains a 0600 file and extracts it as Having a If we'd warn users about strict umask settings and the consequences and a possible opt-in to set laxer permissions within the bundle, maybe that would suffice, too? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think unless there is a really strong use-case against it, we should favor the solution that will work for everyone. I feel like the package manager should be in charge of the permissions rather than carrying over (often accidental) permissions from the system on which the bundle is created (more so if they are creating them by hand) -- especially since there are also unexpected interactions depending on the umask zkg runs with (the permissions of the unpacked files don't have any effect on compiled code, even with this PR). A package should be portable, and if the end-user needs to worry about permissions on both the bundling system and the unbundling system, I don't think the package is very portable. What is the best practice for running zkg? It seems like it would be best for zkg to run as a user that is different from the user running zeek, as well as a non-root user. It would be great if we could mask permissions with say If we want to provide a way for users to control permissions of files written by zkg, maybe it would be better to provide a cli/config option for The only use-case I have come up with for wanting some files to have more restrictive permissions than others is if a user is including sensitive files in a bundle, which I hope they are not -- however even this use-case could be handled by using a more restrictive umask for all files. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yeah, that was the only thought, and can also see that it's a "don't do that". I think I'm still leaning on the side of putting the portable/appropriate permission bits into the bundle/tar, but during extraction at least not make them more permissible. Maybe I'm too timid and someone else should chime in. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be better to separate that piece into another ticket/PR? Also, since the process umask is honored while building (for files created during the build step), should we also honor the process umask while unbundling to be more consistent? I'm not really sure what the best approach is there, but I really don't like the inconsistency. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'd wait and see if @ckreibich or others opine. As said, maybe I'm just too timid and making everything 0644/0755 is okay.
Hmm, with the current state of the PR, I'd even say you could open-up/force the umask to
And even with the stricter permission handling, maybe a warning is appropriate if umask is found to be too strict before building. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The latest commit (1ff7c78) should set the umask on build based on a new |
||
mode = member.mode | ||
if member.isreg() or member.islnk(): | ||
# if user has x bit | ||
if mode is None or not mode & 0o100: | ||
# not executable | ||
mode = 0o644 | ||
else: | ||
mode = 0o755 | ||
elif member.isdir(): | ||
mode = 0o755 | ||
elif member.issym(): | ||
mode = None | ||
else: | ||
raise Exception("unexpected special files in tarfile") | ||
|
||
apply_mask = ~umask & 0o777 | ||
effective = mode & apply_mask | ||
new_attrs["mode"] = effective | ||
|
||
if extract: | ||
new_attrs["uid"] = new_attrs["gid"] = None | ||
new_attrs["uname"] = new_attrs["gname"] = None | ||
else: | ||
new_attrs["uid"] = new_attrs["gid"] = 0 | ||
new_attrs["uname"] = new_attrs["gname"] = "root" | ||
|
||
return new_attrs | ||
|
||
|
||
def zkg_tarfile_create_filter(member): | ||
"""Filter member items during tar creation | ||
|
||
Args: | ||
member (tarfile.TarInfo): the member to inspect/normalize | ||
|
||
Returns: | ||
tarfile.TarInfo: the new member with desired permissions or None to omit | ||
""" | ||
new_attrs = zkg_update_perms(member, extract=False) | ||
|
||
# copy.deepcopy() can't copy a file handle | ||
return member.replace(deep=False, **new_attrs) | ||
|
||
|
||
def zkg_tarfile_extract_filter(member, dest_path, umask=None): | ||
# we are uncompressing, so do more sanity checking | ||
new_member = tarfile.data_filter(member, dest_path) | ||
|
||
new_attrs = zkg_update_perms(member, extract=True, umask=umask) | ||
|
||
tar.extractall(destdir) | ||
return new_member.replace(**new_attrs) | ||
|
||
|
||
def find_sentence_end(s): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should continue to use
tarfile
from the Python distribution if it's modern enough. If anything changes in the distribution version, at least it would be captured.Something like that might work.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure I like this vendoring all that much, but after staring a bit at the tarfile difference, think it's okay.