Skip to content

Commit

Permalink
Add testing of scoring and page generation
Browse files Browse the repository at this point in the history
  • Loading branch information
bobot committed Aug 7, 2024
1 parent 239f734 commit 96162f8
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 5 deletions.
9 changes: 5 additions & 4 deletions smtcomp/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,12 +1480,13 @@ def data(self) -> Path:
raise ValueError("Configuration without data")
return self._data

@functools.cached_property
def previous_years(self) -> list[int]:
return list(range(self.oldest_previous_results, self.current_year))

@functools.cached_property
def previous_results(self) -> list[tuple[int, Path]]:
return [
(year, self.data.joinpath(f"results-sq-{year}.json.gz"))
for year in range(Config.oldest_previous_results, Config.current_year)
]
return [(year, self.data.joinpath(f"results-sq-{year}.json.gz")) for year in self.previous_years]

@functools.cached_property
def current_results(self) -> dict[Track, Path]:
Expand Down
2 changes: 2 additions & 0 deletions smtcomp/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,8 @@ def helper_get_results(
-1 is used when no answer is available.
The second value returned is the selection
"""
if len(results) == 0:
lf = (
Expand Down
2 changes: 1 addition & 1 deletion smtcomp/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def track_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config, tar
)

# Filter with the selected benchmarks
return benchmarks_with_info.join(selected_benchmarks, on="file", how="outer_coalesce").with_columns(
return benchmarks_with_info.join(selected_benchmarks, on="file", how="full").with_columns(
pl.col("selected").fill_null(False)
)

Expand Down
147 changes: 147 additions & 0 deletions smtcomp/test_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import smtcomp.defs as defs
from pydantic.networks import HttpUrl
from pathlib import Path
from smtcomp.unpack import write_cin

logic = defs.Logic.QF_ABV
""" An arbitrary logic present in all tested tracks """

solver_best = "SolverBest"
""" Solver which prove everything """

solver_sound = "SolverSound"
""" Solver which is sound but don't solve everything """

solver_error = "SolverError"
""" Solver which makes errors """

daisy_family = tuple(["daisy"])

file_sat = defs.Smt2File(incremental=False, logic=defs.Logic.QF_ABV, family=daisy_family, name="sat.smt2")

file_unsat = defs.Smt2File(incremental=False, logic=defs.Logic.QF_ABV, family=daisy_family, name="unsat.smt2")

file_unknown = defs.Smt2File(incremental=False, logic=defs.Logic.QF_ABV, family=daisy_family, name="unkown.smt2")

file_incremental = defs.Smt2File(
incremental=True, logic=defs.Logic.QF_ABV, family=daisy_family, name="incremental.smt2"
)


def mk_submissions() -> list[defs.Submission]:
contributor = "Author"
email = "[email protected]"

def mk_submission(name: str) -> defs.Submission:
return defs.Submission(
name=name,
contributors=[defs.Contributor(name=contributor)],
contacts=[defs.NameEmail(name=contributor, email=email)],
command=defs.Command(binary=name),
archive=defs.Archive(url=HttpUrl("http://research.mars/archive.tgz")),
website=HttpUrl("http://research.mars/"),
system_description=HttpUrl("http://research.mars/"),
solver_type=defs.SolverType.standalone,
participations=defs.Participations(
[
defs.Participation(
tracks=[defs.Track.SingleQuery, defs.Track.UnsatCore, defs.Track.Incremental],
logics=defs.Logics([defs.Logic.QF_ABV]),
)
]
),
seed=42,
)

return [
mk_submission(solver_best),
mk_submission(solver_sound),
mk_submission(solver_error),
]


def mk_benchmarks() -> defs.Benchmarks:
# sort file by name
return defs.Benchmarks(
non_incremental=[
defs.InfoNonIncremental(file=file_sat, status=defs.Status.Sat, asserts=10),
defs.InfoNonIncremental(file=file_unknown, status=defs.Status.Unknown, asserts=10),
defs.InfoNonIncremental(file=file_unsat, status=defs.Status.Unsat, asserts=10),
],
incremental=[
defs.InfoIncremental(file=file_incremental, check_sats=10),
],
)


def mk_results(
track: defs.Track, solver: str, results: list[tuple[defs.Smt2File, defs.Answer, int]]
) -> list[defs.Result]:
return [
defs.Result(
track=track,
solver=solver,
file=file,
result=result,
cpu_time=10.0,
wallclock_time=20.0,
memory_usage=10.0,
nb_answers=nb_answers,
)
for (file, result, nb_answers) in results
]


def mk_previous_years_results(config: defs.Config) -> list[tuple[int, Path, defs.Results]]:
results = [(file_sat, defs.Answer.Sat, 1)]
return [
(year, p, defs.Results(results=mk_results(defs.Track.SingleQuery, solver_best, results)))
for (year, p) in config.previous_results
]


def mk_sq_results() -> defs.Results:
results = [(file_sat, defs.Answer.Sat, 1), (file_unknown, defs.Answer.Unsat, 1), (file_unsat, defs.Answer.Unsat, 1)]

return defs.Results(
results=mk_results(defs.Track.SingleQuery, solver_best, results)
+ mk_results(defs.Track.SingleQuery, solver_sound, results[:-1] + [(file_unsat, defs.Answer.Unknown, 1)])
+ mk_results(defs.Track.SingleQuery, solver_error, results[:-1] + [(file_unsat, defs.Answer.Sat, 1)])
)


def mk_uc_results() -> defs.Results:
results = [(file_unknown, defs.Answer.Unsat, 7), (file_unsat, defs.Answer.Unsat, 7)]

return defs.Results(
results=mk_results(defs.Track.SingleQuery, solver_best, results)
+ mk_results(defs.Track.SingleQuery, solver_sound, results[:-1] + [(file_unsat, defs.Answer.Unknown, 1)])
+ mk_results(defs.Track.SingleQuery, solver_error, results[:-1] + [(file_unsat, defs.Answer.Sat, 1)])
)


def mk_inc_results() -> defs.Results:
results = [(file_incremental, defs.Answer.Incremental, 7)]

return defs.Results(
results=mk_results(defs.Track.SingleQuery, solver_best, results)
+ mk_results(defs.Track.SingleQuery, solver_sound, [(file_incremental, defs.Answer.Incremental, 5)])
+ mk_results(defs.Track.SingleQuery, solver_error, [(file_incremental, defs.Answer.IncrementalError, 0)])
)


def write_test_files(data: Path) -> None:
config = defs.Config(data)
submissions = data / ".." / "submissions"
submissions.mkdir()
for submission in mk_submissions():
(submissions / (submission.name + ".json")).write_text(submission.model_dump_json(indent=2))
write_cin(config.benchmarks, mk_benchmarks().model_dump_json())
for _, file, results in mk_previous_years_results(config):
write_cin(file, results.model_dump_json(indent=2))
for track, gen in [
(defs.Track.SingleQuery, mk_sq_results),
(defs.Track.UnsatCore, mk_uc_results),
(defs.Track.Incremental, mk_inc_results),
]:
write_cin(config.current_results[track], gen().model_dump_json(indent=2))
89 changes: 89 additions & 0 deletions tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
from smtcomp.submission import read
from smtcomp.generate_benchmarks import generate_trivial_benchmarks, path_trivial_benchmark

import smtcomp.test_generation as gtests
import smtcomp.results, smtcomp.scoring, smtcomp.generate_website_page

runner = CliRunner()
good_cases = ["tests/test1.json", "submissions/template/template.json", "submissions/template/template_aws_tracks.json"]
bad_cases = ["test_bad.json"]
Expand Down Expand Up @@ -56,3 +59,89 @@ def test_generate_trivial(tmp_path: Path) -> None:
for logic in logics:
for status in statuses:
assert path_trivial_benchmark(tmp_path, track, logic, status).exists()


@pytest.fixture(scope="session")
def config(tmp_path_factory: pytest.TempPathFactory) -> defs.Config:
fn = tmp_path_factory.mktemp("tmp")
data = fn / "data"
data.mkdir()
gtests.write_test_files(data)
result = runner.invoke(app, ["create-cache", str(data.absolute())])
assert result.exit_code == 0
return defs.Config(data)


def test_results_sq_export(config: defs.Config) -> None:
print(config.data)
track = defs.Track.SingleQuery
results, selection = smtcomp.results.helper_get_results(config, [], track)
scores = smtcomp.scoring.add_disagreements_info(results, track)
sound_solvers = scores.filter(sound_solver=True).select("solver").unique().collect()["solver"].sort().to_list()
assert sound_solvers == [gtests.solver_best, gtests.solver_sound]
smtcomp.generate_website_page.export_results(config, selection, results, track)
page_suffix = smtcomp.generate_website_page.page_track_suffix(track)
podium = smtcomp.generate_website_page.PodiumDivision.model_validate_json(
(config.web_results / f"{gtests.logic.name.lower()}-{page_suffix}.md").read_text()
)
print(podium)
assert podium.winner_seq == gtests.solver_best

assert podium.sequential[0].name == gtests.solver_best
assert podium.sequential[0].errorScore == 0
assert podium.sequential[0].correctScore == 3
assert podium.sequential[1].name == gtests.solver_sound
assert podium.sequential[1].errorScore == 0
assert podium.sequential[1].correctScore == 2
assert podium.sequential[2].name == gtests.solver_error
assert podium.sequential[2].errorScore == 1
assert podium.sequential[2].correctScore == 2


def test_results_uc_export(config: defs.Config) -> None:
print(config.data)
track = defs.Track.UnsatCore
results, selection = smtcomp.results.helper_get_results(config, [], track)
scores = smtcomp.scoring.add_disagreements_info(results, track)
sound_solvers = scores.filter(sound_solver=True).select("solver").unique().collect()["solver"].sort().to_list()
assert sound_solvers == [gtests.solver_best, gtests.solver_sound]
smtcomp.generate_website_page.export_results(config, selection, results, track)
page_suffix = smtcomp.generate_website_page.page_track_suffix(track)
podium = smtcomp.generate_website_page.PodiumDivision.model_validate_json(
(config.web_results / f"{gtests.logic.name.lower()}-{page_suffix}.md").read_text()
)
print(podium)
assert podium.winner_seq == gtests.solver_best

assert podium.sequential[0].name == gtests.solver_best
assert podium.sequential[0].errorScore == 0
assert podium.sequential[0].correctScore == 6
assert podium.sequential[1].name == gtests.solver_sound
assert podium.sequential[1].errorScore == 0
assert podium.sequential[1].correctScore == 3
assert podium.sequential[2].name == gtests.solver_error
assert podium.sequential[2].errorScore == 1
assert podium.sequential[2].correctScore == 3


def test_results_inc_export(config: defs.Config) -> None:
print(config.data)
track = defs.Track.Incremental
results, selection = smtcomp.results.helper_get_results(config, [], track)
smtcomp.generate_website_page.export_results(config, selection, results, track)
page_suffix = smtcomp.generate_website_page.page_track_suffix(track)
podium = smtcomp.generate_website_page.PodiumDivision.model_validate_json(
(config.web_results / f"{gtests.logic.name.lower()}-{page_suffix}.md").read_text()
)
print(podium)
assert podium.winner_seq == gtests.solver_best

assert podium.sequential[0].name == gtests.solver_best
assert podium.sequential[0].errorScore == 0
assert podium.sequential[0].correctScore == 7
assert podium.sequential[1].name == gtests.solver_sound
assert podium.sequential[1].errorScore == 0
assert podium.sequential[1].correctScore == 5
assert podium.sequential[2].name == gtests.solver_error
assert podium.sequential[2].errorScore == 1
assert podium.sequential[2].correctScore == 0

0 comments on commit 96162f8

Please sign in to comment.