From 263d4bdc38d51fc3e75fd8710353ed2d57e405a1 Mon Sep 17 00:00:00 2001 From: Hristo Georgiev Date: Wed, 7 Apr 2021 16:29:57 +0000 Subject: [PATCH 1/2] Introduce `safer_eval` --- partd/numpy.py | 9 ++++--- partd/tests/test_numpy.py | 53 +++++++++++++++++++++++++++++++++++--- partd/tests/test_pandas.py | 11 +++++--- partd/tests/test_utils.py | 20 +++++++++++++- partd/utils.py | 44 ++++++++++++++++++++++++++++--- 5 files changed, 123 insertions(+), 14 deletions(-) diff --git a/partd/numpy.py b/partd/numpy.py index ee28052..31e5c1c 100644 --- a/partd/numpy.py +++ b/partd/numpy.py @@ -5,12 +5,15 @@ description of the array's dtype. """ from __future__ import absolute_import + +from toolz import identity, partial, valmap + import numpy as np -from toolz import valmap, identity, partial + from .compatibility import pickle from .core import Interface from .file import File -from .utils import frame, framesplit, suffix, ignoring +from .utils import frame, framesplit, ignoring, safer_eval, suffix def serialize_dtype(dt): @@ -34,7 +37,7 @@ def parse_dtype(s): dtype([('a', '>> safer_eval("1") + 1 + >>> safer_eval("[1, 2, 3]") + [1, 2, 3] + >>> safer_eval("['a', 'b', 'c']") + ['a', 'b', 'c'] + """ + # Preserve the original type, if it's not ``str``, but ensure that sanity + # checks are performed over a ``str`` representation of the input. + string = source if type(source) is str else str(source) + + # Disallow evaluation of non-printable chracters. + if any(map(lambda c: c not in printable, string)): + raise ValueError("Cannot evaluate strings containing non-printable characters") + + # Disallow evaluation of dunder/magic Python methods. + # Access to the latter may recover ``__builtins__``. + if '__' in string: + raise ValueError("Cannot evaluate strings containing '__'") + + # Disallow ``__builtins__`` (e.g., ``__import__``, etc.). + return eval(source, {'__builtins__': {}}) From 4740f139f31725097ac085c31d2aa71bba63c619 Mon Sep 17 00:00:00 2001 From: Hristo Georgiev Date: Wed, 7 Apr 2021 16:49:30 +0000 Subject: [PATCH 2/2] Employ `@pytest.mark.parametrize` in `test_safer_eval_tuple` --- partd/tests/test_numpy.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/partd/tests/test_numpy.py b/partd/tests/test_numpy.py index 1c2ff34..3f3f0a9 100644 --- a/partd/tests/test_numpy.py +++ b/partd/tests/test_numpy.py @@ -76,23 +76,19 @@ def test_non_utf8_bytes(): assert (partd.numpy.deserialize(s, 'O') == a).all() -def test_safer_eval_tuples(): - # Test different quotation mark types. - assert np.dtype(safer_eval(b'[("a", "i4")]')) == np.dtype([('a', '