Skip to content

Commit

Permalink
Improve the test seeding system for better reproducing
Browse files Browse the repository at this point in the history
  • Loading branch information
bcm-at-zama authored Mar 5, 2024
1 parent 84dc590 commit cb5b60f
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 44 deletions.
78 changes: 56 additions & 22 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""PyTest configuration file."""
import hashlib
import json
import random
import re
Expand Down Expand Up @@ -49,15 +50,6 @@ def pytest_addoption(parser):
help="To dump pytest-cov term report to a text file.",
)

parser.addoption(
"--forcing_random_seed",
action="store",
default=None,
type=int,
help="To force the seed of each and every unit test, to be able to "
"reproduce a particular issue.",
)

parser.addoption(
"--weekly",
action="store_true",
Expand Down Expand Up @@ -176,37 +168,79 @@ def function_to_seed_torch(seed):


@pytest.fixture(autouse=True)
def autoseeding_of_everything(record_property, request):
def autoseeding_of_everything(request):
"""Seed everything we can, for determinism."""
main_seed = request.config.getoption("--forcing_random_seed", default=None)

if main_seed is None:
main_seed = random.randint(0, 2**64 - 1)
# Explanations on the seeding system:
#
# The used seed (called sub_seed below) for a test of a function f_i (e.g.,
# test_compute_bits_precision) on a configuration c_j (e.g., x0-8) of a test file t_k (e.g.,
# tests/common/test_utils.py) is computed as some hash(f_i, c_j, t_k, randomly-seed)
#
# It allows to reproduce bugs we would have had on a full pytest execution on a configuration
# (f_i, c_j, t_k) by calling pytest on this single configuration with the --randomly-seed
# parameter and no other arguments.
#
# In particular, it is resistant to crashes which would prevent the few prints below in this
# function, which details some seeding information

seed = main_seed
record_property("main seed", main_seed)
randomly_seed = request.config.getoption("--randomly-seed", default=None)

if randomly_seed is None:
raise ValueError("--randomly-seed has not been properly configured internally")

# We need to find the relative file path of the test file. It does not look native with request,
# so we recompute it.
absolute_path = str(request.fspath)

# This avoids unexpected test paths with several "concrete-ml/tests" occurrences, which may
# happen only if the developer cloned with a very strange path
assert (
absolute_path.count("concrete-ml/test") == 1
), f"{absolute_path=} has several 'concrete-ml/tests' occurences, which is unexpected"

relative_file_path = absolute_path[
absolute_path.find("concrete-ml/test") + len("concrete-ml/") :
]

# Derive the sub_seed from the randomly_seed and the test name
derivation_string = f"{relative_file_path} # {str(request.node.name)} # {randomly_seed}"

hash_object = hashlib.sha256()
hash_object.update(b"{derivation_string}")
hash_object.digest()
hash_value = hash_object.hexdigest()

# The hash is a SHA256, so 256b. And random.seed wants a 64b seed and numpy.random.seed wants a
# 32b seed. So we reduce a bit
sub_seed = int(hash_value, 16) % 2**64

print(f"\nUsing {randomly_seed=}\nUsing {derivation_string=}\nUsing {sub_seed=}")

# And then, do everything per this sub_seed
seed = sub_seed

# Python
random.seed(seed)
print("\nForcing seed to random.seed to ", seed)
print(
f"\nRelaunch the tests with --forcing_random_seed {seed} "
+ "--randomly-dont-reset-seed to reproduce. Remark that adding --randomly-seed=... "
+ "is needed when the testcase uses randoms in pytest parameters"
f"\nRelaunch the tests with --randomly_seed {randomly_seed} "
+ "--randomly-dont-reset-seed to reproduce."
)
print(
"Remark that potentially, any option used in the pytest call may have an impact so in "
+ "case of problem to reproduce, you may want to have a look to `make pytest` options"
)

# Python
random.seed(seed)

# Numpy
seed += 1
numpy.random.seed(seed % 2**32)

# Seed torch
seed += 1
function_to_seed_torch(seed)
return {"main seed": main_seed}

return {"randomly seed": randomly_seed}


@pytest.fixture
Expand Down
14 changes: 6 additions & 8 deletions deps_licenses/licenses_mac_intel_user.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
Name, Version, License
GitPython, 3.1.41, BSD License
PyYAML, 6.0.1, MIT License
annotated-types, 0.6.0, MIT License
anyio, 4.2.0, MIT License
boto3, 1.34.37, Apache Software License
botocore, 1.34.37, Apache Software License
anyio, 3.7.1, MIT License
boto3, 1.34.38, Apache Software License
botocore, 1.34.38, Apache Software License
brevitas, 0.8.0, UNKNOWN
certifi, 2023.7.22, Mozilla Public License 2.0 (MPL 2.0)
charset-normalizer, 3.3.2, MIT License
Expand All @@ -14,7 +13,7 @@ concrete-python, 2.5.1, BSD-3-Clause
dependencies, 2.0.1, BSD License
dill, 0.3.8, BSD License
exceptiongroup, 1.2.0, MIT License
fastapi, 0.109.2, MIT License
fastapi, 0.103.2, MIT License
filelock, 3.13.1, The Unlicense (Unlicense)
flatbuffers, 23.5.26, Apache Software License
fsspec, 2024.2.0, BSD License
Expand All @@ -40,8 +39,7 @@ packaging, 23.2, Apache Software License; BSD License
pluggy, 1.4.0, MIT License
protobuf, 3.20.3, BSD-3-Clause
psutil, 5.9.8, BSD License
pydantic, 2.6.1, MIT License
pydantic_core, 2.16.2, MIT License
pydantic, 1.10.14, MIT License
pytest, 7.4.1, MIT License
pytest-json-report, 1.5.0, MIT
pytest-metadata, 3.1.0, Mozilla Public License 2.0 (MPL 2.0)
Expand All @@ -58,7 +56,7 @@ skops, 0.5.0, MIT
skorch, 0.11.0, new BSD 3-Clause
smmap, 5.0.1, BSD License
sniffio, 1.3.0, Apache Software License; MIT License
starlette, 0.36.3, BSD License
starlette, 0.27.0, BSD License
sympy, 1.12, BSD License
tabulate, 0.8.10, MIT License
threadpoolctl, 3.2.0, BSD License
Expand Down
2 changes: 1 addition & 1 deletion deps_licenses/licenses_mac_intel_user.txt.md5
Original file line number Diff line number Diff line change
@@ -1 +1 @@
f709427468f5be4ee6836603495fea72
a923947bfb17b658ab8efe61d5cafe96
19 changes: 6 additions & 13 deletions script/make_utils/check_pytest_determinism.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,32 +40,25 @@ set -e
diff "${OUTPUT_DIRECTORY}/one.txt" "${OUTPUT_DIRECTORY}/two.txt" -I "passed in"
echo "Successful determinism check"

# Now, check --forcing_random_seed, i.e., check that one can reproduce conditions of a bug in a single file
# and test without having to relaunch the full pytest, by just picking the right --forcing_random_seed
# Now, check that one can reproduce conditions of a bug in a single file
# and test without having to relaunch the full pytest

# All lines that start with "tests/seeding/test_seeding.py::" represent a test that passed
LIST_FILES=$(grep "tests/seeding/test_seeding.py::" "${OUTPUT_DIRECTORY}/one.txt")
LIST_SEED=()
while IFS='' read -r line; do LIST_SEED+=("$line"); done < <(grep "forcing_random_seed" "${OUTPUT_DIRECTORY}/one.txt" | sed -e "s@Relaunch the tests with --forcing_random_seed @@" | sed -e "s@ --randomly-dont-reset-seed to reproduce. Remark that adding --randomly-seed=... is needed when the testcase uses randoms in pytest parameters@@" )

WHICH=0
echo "" > "${OUTPUT_DIRECTORY}/three.txt"
for x in $LIST_FILES
do
# For several tests, we need to add $RANDOMLY_SEED
EXTRA_OPTION=""
if grep -q "tests/seeding/test_seeding.py::test_seed_needing_randomly_seed_arg" <<< "$x"
then
EXTRA_OPTION=" --randomly-seed=$RANDOMLY_SEED"
fi
EXTRA_OPTION=" --randomly-seed=$RANDOMLY_SEED"

echo "poetry run pytest $x -xsvv $EXTRA_OPTION --randomly-dont-reset-seed --forcing_random_seed " "${LIST_SEED[WHICH]}"
echo "poetry run pytest $x -xsvv $EXTRA_OPTION --randomly-dont-reset-seed"

# Only take lines after the header, i.e., after line with 'collecting'
# SC2086 is about double quote to prevent globbing and word splitting, but here, it makes that we have
# an empty arg in pytest, which is considered as "do pytest for all files"
# shellcheck disable=SC2086
poetry run pytest "$x" -xsvv $EXTRA_OPTION --randomly-dont-reset-seed --forcing_random_seed "${LIST_SEED[WHICH]}" | sed -n -e '/collecting/,$p' | grep -v collecting | grep -v "collected" | grep -v "passed in" | grep -v "PASSED" >> "${OUTPUT_DIRECTORY}/three.txt"
poetry run pytest "$x" -xsvv $EXTRA_OPTION --randomly-dont-reset-seed | sed -n -e '/collecting/,$p' | grep -v collecting | grep -v "collected" | grep -v "passed in" | grep -v "PASSED" >> "${OUTPUT_DIRECTORY}/three.txt"

((WHICH+=1))
done
Expand All @@ -77,4 +70,4 @@ echo ""
echo "diff:"
echo ""
diff -u "${OUTPUT_DIRECTORY}/one.modified.txt" "${OUTPUT_DIRECTORY}/three.txt" --ignore-all-space --ignore-blank-lines --ignore-space-change
echo "Successful --forcing_random_seed check"
echo "Successful final check"

0 comments on commit cb5b60f

Please sign in to comment.