From 96162f8ad69b2014faed7f212272498e2a348301 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= Date: Wed, 7 Aug 2024 18:05:41 +0200 Subject: [PATCH] Add testing of scoring and page generation --- smtcomp/defs.py | 9 ++- smtcomp/results.py | 2 + smtcomp/selection.py | 2 +- smtcomp/test_generation.py | 147 +++++++++++++++++++++++++++++++++++++ tests/test_validate.py | 89 ++++++++++++++++++++++ 5 files changed, 244 insertions(+), 5 deletions(-) create mode 100644 smtcomp/test_generation.py diff --git a/smtcomp/defs.py b/smtcomp/defs.py index 05ecb3f2..d99c6204 100644 --- a/smtcomp/defs.py +++ b/smtcomp/defs.py @@ -1480,12 +1480,13 @@ def data(self) -> Path: raise ValueError("Configuration without data") return self._data + @functools.cached_property + def previous_years(self) -> list[int]: + return list(range(self.oldest_previous_results, self.current_year)) + @functools.cached_property def previous_results(self) -> list[tuple[int, Path]]: - return [ - (year, self.data.joinpath(f"results-sq-{year}.json.gz")) - for year in range(Config.oldest_previous_results, Config.current_year) - ] + return [(year, self.data.joinpath(f"results-sq-{year}.json.gz")) for year in self.previous_years] @functools.cached_property def current_results(self) -> dict[Track, Path]: diff --git a/smtcomp/results.py b/smtcomp/results.py index c0a1d13a..c5696b96 100644 --- a/smtcomp/results.py +++ b/smtcomp/results.py @@ -446,6 +446,8 @@ def helper_get_results( -1 is used when no answer is available. + The second value returned is the selection + """ if len(results) == 0: lf = ( diff --git a/smtcomp/selection.py b/smtcomp/selection.py index c1204dbe..3f77c354 100644 --- a/smtcomp/selection.py +++ b/smtcomp/selection.py @@ -164,7 +164,7 @@ def track_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config, tar ) # Filter with the selected benchmarks - return benchmarks_with_info.join(selected_benchmarks, on="file", how="outer_coalesce").with_columns( + return benchmarks_with_info.join(selected_benchmarks, on="file", how="full").with_columns( pl.col("selected").fill_null(False) ) diff --git a/smtcomp/test_generation.py b/smtcomp/test_generation.py new file mode 100644 index 00000000..42ab7fd3 --- /dev/null +++ b/smtcomp/test_generation.py @@ -0,0 +1,147 @@ +import smtcomp.defs as defs +from pydantic.networks import HttpUrl +from pathlib import Path +from smtcomp.unpack import write_cin + +logic = defs.Logic.QF_ABV +""" An arbitrary logic present in all tested tracks """ + +solver_best = "SolverBest" +""" Solver which prove everything """ + +solver_sound = "SolverSound" +""" Solver which is sound but don't solve everything """ + +solver_error = "SolverError" +""" Solver which makes errors """ + +daisy_family = tuple(["daisy"]) + +file_sat = defs.Smt2File(incremental=False, logic=defs.Logic.QF_ABV, family=daisy_family, name="sat.smt2") + +file_unsat = defs.Smt2File(incremental=False, logic=defs.Logic.QF_ABV, family=daisy_family, name="unsat.smt2") + +file_unknown = defs.Smt2File(incremental=False, logic=defs.Logic.QF_ABV, family=daisy_family, name="unkown.smt2") + +file_incremental = defs.Smt2File( + incremental=True, logic=defs.Logic.QF_ABV, family=daisy_family, name="incremental.smt2" +) + + +def mk_submissions() -> list[defs.Submission]: + contributor = "Author" + email = "Author@research.mars" + + def mk_submission(name: str) -> defs.Submission: + return defs.Submission( + name=name, + contributors=[defs.Contributor(name=contributor)], + contacts=[defs.NameEmail(name=contributor, email=email)], + command=defs.Command(binary=name), + archive=defs.Archive(url=HttpUrl("http://research.mars/archive.tgz")), + website=HttpUrl("http://research.mars/"), + system_description=HttpUrl("http://research.mars/"), + solver_type=defs.SolverType.standalone, + participations=defs.Participations( + [ + defs.Participation( + tracks=[defs.Track.SingleQuery, defs.Track.UnsatCore, defs.Track.Incremental], + logics=defs.Logics([defs.Logic.QF_ABV]), + ) + ] + ), + seed=42, + ) + + return [ + mk_submission(solver_best), + mk_submission(solver_sound), + mk_submission(solver_error), + ] + + +def mk_benchmarks() -> defs.Benchmarks: + # sort file by name + return defs.Benchmarks( + non_incremental=[ + defs.InfoNonIncremental(file=file_sat, status=defs.Status.Sat, asserts=10), + defs.InfoNonIncremental(file=file_unknown, status=defs.Status.Unknown, asserts=10), + defs.InfoNonIncremental(file=file_unsat, status=defs.Status.Unsat, asserts=10), + ], + incremental=[ + defs.InfoIncremental(file=file_incremental, check_sats=10), + ], + ) + + +def mk_results( + track: defs.Track, solver: str, results: list[tuple[defs.Smt2File, defs.Answer, int]] +) -> list[defs.Result]: + return [ + defs.Result( + track=track, + solver=solver, + file=file, + result=result, + cpu_time=10.0, + wallclock_time=20.0, + memory_usage=10.0, + nb_answers=nb_answers, + ) + for (file, result, nb_answers) in results + ] + + +def mk_previous_years_results(config: defs.Config) -> list[tuple[int, Path, defs.Results]]: + results = [(file_sat, defs.Answer.Sat, 1)] + return [ + (year, p, defs.Results(results=mk_results(defs.Track.SingleQuery, solver_best, results))) + for (year, p) in config.previous_results + ] + + +def mk_sq_results() -> defs.Results: + results = [(file_sat, defs.Answer.Sat, 1), (file_unknown, defs.Answer.Unsat, 1), (file_unsat, defs.Answer.Unsat, 1)] + + return defs.Results( + results=mk_results(defs.Track.SingleQuery, solver_best, results) + + mk_results(defs.Track.SingleQuery, solver_sound, results[:-1] + [(file_unsat, defs.Answer.Unknown, 1)]) + + mk_results(defs.Track.SingleQuery, solver_error, results[:-1] + [(file_unsat, defs.Answer.Sat, 1)]) + ) + + +def mk_uc_results() -> defs.Results: + results = [(file_unknown, defs.Answer.Unsat, 7), (file_unsat, defs.Answer.Unsat, 7)] + + return defs.Results( + results=mk_results(defs.Track.SingleQuery, solver_best, results) + + mk_results(defs.Track.SingleQuery, solver_sound, results[:-1] + [(file_unsat, defs.Answer.Unknown, 1)]) + + mk_results(defs.Track.SingleQuery, solver_error, results[:-1] + [(file_unsat, defs.Answer.Sat, 1)]) + ) + + +def mk_inc_results() -> defs.Results: + results = [(file_incremental, defs.Answer.Incremental, 7)] + + return defs.Results( + results=mk_results(defs.Track.SingleQuery, solver_best, results) + + mk_results(defs.Track.SingleQuery, solver_sound, [(file_incremental, defs.Answer.Incremental, 5)]) + + mk_results(defs.Track.SingleQuery, solver_error, [(file_incremental, defs.Answer.IncrementalError, 0)]) + ) + + +def write_test_files(data: Path) -> None: + config = defs.Config(data) + submissions = data / ".." / "submissions" + submissions.mkdir() + for submission in mk_submissions(): + (submissions / (submission.name + ".json")).write_text(submission.model_dump_json(indent=2)) + write_cin(config.benchmarks, mk_benchmarks().model_dump_json()) + for _, file, results in mk_previous_years_results(config): + write_cin(file, results.model_dump_json(indent=2)) + for track, gen in [ + (defs.Track.SingleQuery, mk_sq_results), + (defs.Track.UnsatCore, mk_uc_results), + (defs.Track.Incremental, mk_inc_results), + ]: + write_cin(config.current_results[track], gen().model_dump_json(indent=2)) diff --git a/tests/test_validate.py b/tests/test_validate.py index 5fca6113..dfbcf7bb 100644 --- a/tests/test_validate.py +++ b/tests/test_validate.py @@ -10,6 +10,9 @@ from smtcomp.submission import read from smtcomp.generate_benchmarks import generate_trivial_benchmarks, path_trivial_benchmark +import smtcomp.test_generation as gtests +import smtcomp.results, smtcomp.scoring, smtcomp.generate_website_page + runner = CliRunner() good_cases = ["tests/test1.json", "submissions/template/template.json", "submissions/template/template_aws_tracks.json"] bad_cases = ["test_bad.json"] @@ -56,3 +59,89 @@ def test_generate_trivial(tmp_path: Path) -> None: for logic in logics: for status in statuses: assert path_trivial_benchmark(tmp_path, track, logic, status).exists() + + +@pytest.fixture(scope="session") +def config(tmp_path_factory: pytest.TempPathFactory) -> defs.Config: + fn = tmp_path_factory.mktemp("tmp") + data = fn / "data" + data.mkdir() + gtests.write_test_files(data) + result = runner.invoke(app, ["create-cache", str(data.absolute())]) + assert result.exit_code == 0 + return defs.Config(data) + + +def test_results_sq_export(config: defs.Config) -> None: + print(config.data) + track = defs.Track.SingleQuery + results, selection = smtcomp.results.helper_get_results(config, [], track) + scores = smtcomp.scoring.add_disagreements_info(results, track) + sound_solvers = scores.filter(sound_solver=True).select("solver").unique().collect()["solver"].sort().to_list() + assert sound_solvers == [gtests.solver_best, gtests.solver_sound] + smtcomp.generate_website_page.export_results(config, selection, results, track) + page_suffix = smtcomp.generate_website_page.page_track_suffix(track) + podium = smtcomp.generate_website_page.PodiumDivision.model_validate_json( + (config.web_results / f"{gtests.logic.name.lower()}-{page_suffix}.md").read_text() + ) + print(podium) + assert podium.winner_seq == gtests.solver_best + + assert podium.sequential[0].name == gtests.solver_best + assert podium.sequential[0].errorScore == 0 + assert podium.sequential[0].correctScore == 3 + assert podium.sequential[1].name == gtests.solver_sound + assert podium.sequential[1].errorScore == 0 + assert podium.sequential[1].correctScore == 2 + assert podium.sequential[2].name == gtests.solver_error + assert podium.sequential[2].errorScore == 1 + assert podium.sequential[2].correctScore == 2 + + +def test_results_uc_export(config: defs.Config) -> None: + print(config.data) + track = defs.Track.UnsatCore + results, selection = smtcomp.results.helper_get_results(config, [], track) + scores = smtcomp.scoring.add_disagreements_info(results, track) + sound_solvers = scores.filter(sound_solver=True).select("solver").unique().collect()["solver"].sort().to_list() + assert sound_solvers == [gtests.solver_best, gtests.solver_sound] + smtcomp.generate_website_page.export_results(config, selection, results, track) + page_suffix = smtcomp.generate_website_page.page_track_suffix(track) + podium = smtcomp.generate_website_page.PodiumDivision.model_validate_json( + (config.web_results / f"{gtests.logic.name.lower()}-{page_suffix}.md").read_text() + ) + print(podium) + assert podium.winner_seq == gtests.solver_best + + assert podium.sequential[0].name == gtests.solver_best + assert podium.sequential[0].errorScore == 0 + assert podium.sequential[0].correctScore == 6 + assert podium.sequential[1].name == gtests.solver_sound + assert podium.sequential[1].errorScore == 0 + assert podium.sequential[1].correctScore == 3 + assert podium.sequential[2].name == gtests.solver_error + assert podium.sequential[2].errorScore == 1 + assert podium.sequential[2].correctScore == 3 + + +def test_results_inc_export(config: defs.Config) -> None: + print(config.data) + track = defs.Track.Incremental + results, selection = smtcomp.results.helper_get_results(config, [], track) + smtcomp.generate_website_page.export_results(config, selection, results, track) + page_suffix = smtcomp.generate_website_page.page_track_suffix(track) + podium = smtcomp.generate_website_page.PodiumDivision.model_validate_json( + (config.web_results / f"{gtests.logic.name.lower()}-{page_suffix}.md").read_text() + ) + print(podium) + assert podium.winner_seq == gtests.solver_best + + assert podium.sequential[0].name == gtests.solver_best + assert podium.sequential[0].errorScore == 0 + assert podium.sequential[0].correctScore == 7 + assert podium.sequential[1].name == gtests.solver_sound + assert podium.sequential[1].errorScore == 0 + assert podium.sequential[1].correctScore == 5 + assert podium.sequential[2].name == gtests.solver_error + assert podium.sequential[2].errorScore == 1 + assert podium.sequential[2].correctScore == 0