diff --git a/.gitignore b/.gitignore index e23048c8..63837ac0 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,4 @@ cython_debug/ schema_doc.css schema_doc.min.js *.feather +tmp/ diff --git a/smtcomp/archive.py b/smtcomp/archive.py index d0d75f2c..8e6e35fb 100644 --- a/smtcomp/archive.py +++ b/smtcomp/archive.py @@ -38,6 +38,10 @@ def is_unpack_present(archive: defs.Archive, dst: Path) -> bool: return any(True for _ in d.iterdir()) +def command_path(command: defs.Command, archive: defs.Archive, dst: Path) -> Path: + return archive_unpack_dir(archive, dst).joinpath(command.binary) + + def find_command(command: defs.Command, archive: defs.Archive, dst: Path) -> Path: d = archive_unpack_dir(archive, dst) if not (d.exists()): diff --git a/smtcomp/benchexec.py b/smtcomp/benchexec.py index faac6361..2b55e375 100644 --- a/smtcomp/benchexec.py +++ b/smtcomp/benchexec.py @@ -6,13 +6,37 @@ from yattag import Doc, indent from smtcomp import defs -from smtcomp.archive import find_command, archive_unpack_dir +from smtcomp.archive import find_command, archive_unpack_dir, command_path from pydantic import BaseModel import shlex from re import sub +def get_suffix(track: defs.Track) -> str: + match track: + case defs.Track.Incremental: + return "_inc" + case defs.Track.ModelValidation: + return "_model" + case defs.Track.UnsatCore: + return "_unsatcore" + case defs.Track.SingleQuery: + return "" + case _: + raise ValueError("No Cloud or Parallel") + + +def get_xml_name(s: defs.Submission, track: defs.Track) -> str: + suffix = get_suffix(track) + return sub(r"\W+", "", s.name.lower()) + suffix + ".xml" + + +def tool_module_name(s: defs.Submission, incremental: bool) -> str: + suffix = "_inc" if incremental else "" + return sub(r"\W+", "", s.name.lower()) + suffix + + class CmdTask(BaseModel): name: str options: List[str] @@ -36,35 +60,28 @@ def generate_benchmark_yml(benchmark: Path, expected_result: Optional[bool], ori f.write(f" expected_verdict: {expected_str}\n") -def tool_module_name(s: defs.Submission, track: defs.Track) -> str: - return sub(r"\W+", "", s.name.lower()) + get_suffix(track) - - -def generate_tool_module(s: defs.Submission, cachedir: Path, track: defs.Track) -> None: - name = tool_module_name(s, track) +def generate_tool_module(s: defs.Submission, cachedir: Path, incremental: bool) -> None: + name = tool_module_name(s, incremental) file = cachedir / "tools" / f"{name}.py" with file.open("w") as f: - match track: - case defs.Track.Incremental: - base_module = "incremental_tool" - base_class = "IncrementalSMTCompTool" - case defs.Track.SingleQuery: - base_module = "tool" - base_class = "SMTCompTool" - case _: - rich.print( - f"[red]generate_tool_module command does not yet work for the competition track: {track}[/red]" - ) - exit(1) + if incremental: + base_module = "incremental_tool" + base_class = "IncrementalSMTCompTool" + else: + base_module = "tool" + base_class = "SMTCompTool" f.write(f"from tools.{base_module} import {base_class}\n\n") f.write(f"class Tool({base_class}): # type: ignore\n") f.write(f" NAME = '{s.name}'\n") if s.command is not None: assert s.archive is not None - executable_path = find_command(s.command, s.archive, cachedir) - executable = str(relpath(executable_path, start=str(cachedir))) + if s.command.compa_starexec: + executable_path = find_command(s.command, s.archive, cachedir) + executable = str(relpath(executable_path, start=str(cachedir))) + else: + executable = str(command_path(s.command, s.archive, Path())) f.write(f" EXECUTABLE = '{executable}'\n") required_paths = [] @@ -80,8 +97,13 @@ def generate_tool_module(s: defs.Submission, cachedir: Path, track: defs.Track) f.write(f" REQUIRED_PATHS = {required_paths}\n") +def generate_tool_modules(s: defs.Submission, cachedir: Path) -> None: + generate_tool_module(s, cachedir, True) + generate_tool_module(s, cachedir, False) + + def generate_xml( - timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: List[CmdTask], cachedir: Path, tool_module_name: str + timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: List[CmdTask], file: Path, tool_module_name: str ) -> None: doc, tag, text = Doc().tagtext() @@ -117,20 +139,9 @@ def generate_xml( with tag("propertyfile"): text("benchmarks/properties/SMT.prp") - file = cachedir.joinpath(f"{tool_module_name}.xml") file.write_text(indent(doc.getvalue())) -def get_suffix(track: defs.Track) -> str: - match track: - case defs.Track.Incremental: - return "_inc" - case defs.Track.ModelValidation: - return "_model" - case _: - return "" - - def cmdtask_for_submission(s: defs.Submission, cachedir: Path, target_track: defs.Track) -> List[CmdTask]: res: List[CmdTask] = [] i = -1 @@ -147,10 +158,10 @@ def cmdtask_for_submission(s: defs.Submission, cachedir: Path, target_track: def for _, logics in divisions.items(): tasks.extend([str(logic) + suffix for logic in logics]) if tasks: - executable_path = find_command(command, archive, cachedir) - executable = str(relpath(executable_path, start=str(cachedir))) if command.compa_starexec: assert command.arguments == [] + executable_path = find_command(command, archive, cachedir) + executable = str(relpath(executable_path, start=str(cachedir))) dirname = str(relpath(executable_path.parent, start=str(cachedir))) options = [ @@ -160,6 +171,7 @@ def cmdtask_for_submission(s: defs.Submission, cachedir: Path, target_track: def "compa_starexec", ] else: + executable = str(command_path(command, archive, Path())) options = [executable] + command.arguments cmdtask = CmdTask( name=f"{s.name},{i},{track}", diff --git a/smtcomp/defs.py b/smtcomp/defs.py index f278b8c8..1474ae7b 100644 --- a/smtcomp/defs.py +++ b/smtcomp/defs.py @@ -1351,6 +1351,10 @@ class Config: cpuCores = 4 min_used_benchmarks = 300 ratio_of_used_benchmarks = 0.5 + use_previous_results_for_status = True + """ + Complete the status given in the benchmarks using previous results + """ old_criteria = False """"Do we try to emulate <= 2023 criteria that does not really follow the rules""" invert_triviality = False diff --git a/smtcomp/execution.py b/smtcomp/execution.py index 85c00666..de31cefa 100644 --- a/smtcomp/execution.py +++ b/smtcomp/execution.py @@ -6,13 +6,11 @@ def trace_executor_url() -> str: - return ( - "https://github.com/SMT-COMP/trace-executor/releases/download/smtcomp2022/SMT-COMP-2022-trace-executor.tar.gz" - ) + return "https://github.com/SMT-COMP/trace-executor/releases/download/smtcomp2024-rc1/SMT-COMP-2024-trace-executor.tar.gz" def trace_executor_filename() -> str: - return "SMT-COMP-2022-trace-executor.tar.gz" + return "SMT-COMP-2024-trace-executor.tar.gz" def download_trace_executor(dst: Path) -> None: diff --git a/smtcomp/main.py b/smtcomp/main.py index 5f7101ee..b0e8a0c0 100644 --- a/smtcomp/main.py +++ b/smtcomp/main.py @@ -157,22 +157,32 @@ def generate_benchexec( ) -> None: """ Generate the benchexec file for the given submissions + + (The cachedir directory need to contain unpacked archive only with compa_starexec command) """ + (cachedir / "tools").mkdir(parents=True, exist_ok=True) for file in track(files): s = submission.read(str(file)) + benchexec.generate_tool_modules(s, cachedir) - for target_track in [defs.Track.SingleQuery, defs.Track.Incremental]: - tool_module_name = benchexec.tool_module_name(s, target_track) - benchexec.generate_tool_module(s, cachedir, target_track) + for target_track in [ + defs.Track.SingleQuery, + defs.Track.Incremental, + defs.Track.ModelValidation, + defs.Track.UnsatCore, + ]: + tool_module_name = benchexec.tool_module_name(s, target_track == defs.Track.Incremental) res = benchexec.cmdtask_for_submission(s, cachedir, target_track) if res: + basename = benchexec.get_xml_name(s, target_track) + file = cachedir / basename benchexec.generate_xml( timelimit_s=timelimit_s, memlimit_M=memlimit_M, cpuCores=cpuCores, cmdtasks=res, - cachedir=cachedir, + file=file, tool_module_name=tool_module_name, ) @@ -372,6 +382,7 @@ def show_sq_selection_stats( min_use_benchmarks: int = defs.Config.min_used_benchmarks, ratio_of_used_benchmarks: float = defs.Config.ratio_of_used_benchmarks, invert_triviality: bool = False, + use_previous_results_for_status: bool = defs.Config.use_previous_results_for_status, ) -> None: """ Show statistics on the benchmarks selected for single query track @@ -385,6 +396,7 @@ def show_sq_selection_stats( config.ratio_of_used_benchmarks = ratio_of_used_benchmarks config.invert_triviality = invert_triviality config.old_criteria = old_criteria + config.use_previous_results_for_status = use_previous_results_for_status benchmarks_with_info = smtcomp.selection.helper_compute_sq(config) b3 = ( benchmarks_with_info.group_by(["logic"]) @@ -394,6 +406,8 @@ def show_sq_selection_stats( old_never_ran=pl.col("file").filter(run=False, new=False).len(), new=pl.col("new").sum(), selected=pl.col("file").filter(selected=True).len(), + selected_sat=pl.col("file").filter(selected=True, status=int(defs.Status.Sat)).len(), + selected_unsat=pl.col("file").filter(selected=True, status=int(defs.Status.Unsat)).len(), selected_already_run=pl.col("file").filter(selected=True, run=True).len(), ) .sort(by="logic") @@ -407,6 +421,8 @@ def show_sq_selection_stats( table.add_column("never compet.", justify="right", style="magenta") table.add_column("new", justify="right", style="magenta1") table.add_column("selected", justify="right", style="green3") + table.add_column("selected sat", justify="right", style="green4") + table.add_column("selected unsat", justify="right", style="green4") table.add_column("selected already run", justify="right", style="green4") used_logics = defs.logic_used_for_track(defs.Track.SingleQuery) @@ -424,6 +440,8 @@ def show_sq_selection_stats( str(d["old_never_ran"]), str(d["new"]), str(d["selected"]), + str(d["selected_sat"]), + str(d["selected_unsat"]), str(d["selected_already_run"]), ) @@ -435,6 +453,8 @@ def show_sq_selection_stats( str(b3["old_never_ran"].sum()), str(b3["new"].sum()), str(b3["selected"].sum()), + str(b3["selected_sat"].sum()), + str(b3["selected_unsat"].sum()), str(b3["selected_already_run"].sum()), ) @@ -587,13 +607,17 @@ def read_submission(file: Path) -> defs.Submission: for part in sub.complete_participations(): for track, divisions in part.tracks.items(): match track: - case defs.Track.Incremental: - statuses = [defs.Status.Sat, defs.Status.Unsat] case defs.Track.ModelValidation: statuses = [defs.Status.Sat] case defs.Track.SingleQuery: statuses = [defs.Status.Sat, defs.Status.Unsat] - case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel: + case ( + defs.Track.Incremental + | defs.Track.UnsatCore + | defs.Track.ProofExhibition + | defs.Track.Cloud + | defs.Track.Parallel + ): continue for _, logics in divisions.items(): for logic in logics: diff --git a/smtcomp/results.py b/smtcomp/results.py new file mode 100644 index 00000000..1adacf6d --- /dev/null +++ b/smtcomp/results.py @@ -0,0 +1,193 @@ +from typing import Optional, Iterator, Any +import functools +import smtcomp.defs as defs +import polars as pl +import xml.etree.ElementTree as ET +from pathlib import Path +from smtcomp.unpack import read_cin +from pydantic import BaseModel +from datetime import timedelta +from zipfile import ZipFile + + +class RunId(BaseModel): + solver: str + participation: int + track: defs.Track + includefile: str + + @classmethod + def unmangle(cls, s: str) -> "RunId": + name_l = s.split(",") + return RunId( + solver=name_l[0], + participation=int(name_l[1]), + track=defs.Track(name_l[2]), + # The name "task" is currently added at the end, name of the task + includefile=name_l[3].split(".")[0], + ) + + def mangle(self) -> str: + return ",".join([self.solver, str(self.participation), str(self.track), self.includefile]) + + +class Run(BaseModel): + basename: str + cputime_s: float + """ example: 0.211880968s""" + memory_B: int + """ example: 21127168B""" + status: str + """ example: true""" + walltime_s: float + """ example: 0.21279739192686975s""" + + # host: str + # """ example: pontos07""" + # blkio-read: str + # """ example: 0B """ + # blkio-write: str + # """ example: 0B """ + # category: str + # """ example: missing """ + # cpuCores: str + # """ example: 0 """ + # cputime-cpu0: str + # """ example: 0.211880968s """ + # memoryNodes: str + # """ example: 0 """ + # returnvalue: str + # """ example: 1 """ + # starttime: str + # """ example: 2024-06-07T19:57:32.499898+02:00""" + # vcloud-additionalEnvironment: str + # """ example: """ + # vcloud-cpuCoresDetails: str + # """ example: [Processor{0, core=0, socket=0} """ + # vcloud-debug: str + # """ example: false """ + # vcloud-generatedFilesCount: str + # """ example: 1 """ + # vcloud-matchedResultFilesCount: str + # """ example: 1 """ + # vcloud-maxLogFileSize: str + # """ example: 20 MB """ + # vcloud-memoryNodesAllocation: str + # """ example: {0=2.0 GB} """ + # vcloud-newEnvironment: str + # """ example: """ + # vcloud-outerwalltime: str + # """ example: 0.554s """ + # vcloud-runId: str + # """ example: 6a08ebbd-2af2-4c18-af44-429a0439fab """ + + +class Results(BaseModel): + runid: RunId + options: str + runs: list[Run] + + +def parse_time(s: str) -> float: + assert s[-1] == "s" + return float(s[:-1]) + + +def parse_size(s: str) -> int: + assert s[-1] == "B" + return int(s[:-1]) + + +def convert_run(r: ET.Element) -> Run: + basename = Path(r.attrib["name"]).name + cputime_s: Optional[float] = None + memory_B: Optional[int] = None + status: Optional[str] = None + walltime_s: Optional[float] = None + + for col in r.iterfind("column"): + value = col.attrib["value"] + match col.attrib["title"]: + case "cputime": + cputime_s = parse_time(value) + case "memory": + memory_B = parse_size(value) + case "status": + status = value + case "walltime": + walltime_s = parse_time(value) + + if cputime_s is None or memory_B is None or status is None or walltime_s is None: + raise ValueError("xml of results doesn't contains some expected column") + + return Run(basename=basename, cputime_s=cputime_s, memory_B=memory_B, status=status, walltime_s=walltime_s) + + +def parse_xml(file: Path) -> Results: + result = ET.fromstring(read_cin(file)) + runs = list(map(convert_run, result.iterfind("run"))) + return Results(runid=RunId.unmangle(result.attrib["name"]), options=result.attrib["options"], runs=runs) + + +def to_pl(r: Results) -> pl.LazyFrame: + lf = pl.LazyFrame(r.runs) + return lf.with_columns(solver=pl.lit(r.runid.solver), participation=r.runid.participation, track=int(r.runid.track)) + + +def parse_dir(dir: Path) -> pl.LazyFrame: + return pl.concat(map(lambda file: to_pl(parse_xml(file)), dir.glob("*.xml.bz2"))) + + +def log_filename(dir: Path) -> Path: + l = list(dir.glob("*.logfiles.zip")) + if len(l) != 1: + raise (ValueError(f"Directory {dir!r} doesn't contains *.logfiles.zip archive")) + return l[0] + + +### Benchexec add this header +# output_file.write( +# " ".join(map(util.escape_string_shell, args)) +# + "\n\n\n" +# + "-" * 80 +# + "\n\n\n" +# ) + + +@functools.cache +def benchexec_log_separator() -> str: + return "\n\n\n" + "-" * 80 + "\n\n\n" + + +class LogFile: + def __init__(self: "LogFile", dir: Path) -> None: + filename = log_filename(dir) + self.logfiles = ZipFile(filename) + self.name = Path(filename.name.removesuffix(".zip")) + + def __enter__(self: "LogFile") -> "LogFile": + return self + + def __exit__(self: "LogFile", exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def close(self) -> None: + self.logfiles.close() + + def get_log(self: "LogFile", r: RunId, basename: str) -> str: + """ + Return the output of the prover and the header with the commandline used + """ + p = str(self.name.joinpath(".".join([r.mangle(), basename, "log"]))) + return self.logfiles.read(p).decode() + + def get_output(self: "LogFile", r: RunId, basename: str) -> str: + """ + Return the output of the prover + """ + s = self.get_log(r, basename) + index = s.find(benchexec_log_separator()) + if index == -1: + raise ValueError(f"Log Header not found {r!r} {basename!r}") + index += len(benchexec_log_separator()) + return s[index:] diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py index ca625e29..d579cdd8 100644 --- a/smtcomp/scramble_benchmarks.py +++ b/smtcomp/scramble_benchmarks.py @@ -154,16 +154,10 @@ def select_and_scramble( selected = smtcomp.selection.helper_compute_incremental(config) case defs.Track.ModelValidation: selected = smtcomp.selection.helper_compute_sq(config) - rich.print( - f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]" - ) - exit(1) + selected = selected.filter(status=int(defs.Status.Sat)) case defs.Track.UnsatCore: selected = smtcomp.selection.helper_compute_sq(config) - rich.print( - f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]" - ) - exit(1) + selected = selected.filter(status=int(defs.Status.Unsat)) case defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel: selected = smtcomp.selection.helper_compute_sq(config) rich.print( diff --git a/smtcomp/selection.py b/smtcomp/selection.py index e7dd216a..ad8409f7 100644 --- a/smtcomp/selection.py +++ b/smtcomp/selection.py @@ -38,12 +38,23 @@ def find_trivial(results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame: .agg( trivial= # All the results are sat or unsat and the time is below 1s - ((c_result != int(defs.Status.Unknown)) & (c_cpu_time <= 1.0)).all() + ((c_result != int(defs.Status.Unknown)) & (c_cpu_time <= 1.0)).all(), + # Compute the full result by year + result=pl.when((c_result == int(defs.Status.Sat)).sum() >= 2) + .then(int(defs.Status.Sat)) + .when((c_result == int(defs.Status.Unsat)).sum() >= 2) + .then(int(defs.Status.Unsat)) + .otherwise(int(defs.Status.Unknown)), ) .group_by("file") .agg( trivial=c_trivial.any() if config.old_criteria else c_trivial.all(), run=True, + result=pl.when((c_result == int(defs.Status.Sat)).any()) + .then(int(defs.Status.Sat)) + .when((c_result == int(defs.Status.Unsat)).any()) + .then(int(defs.Status.Unsat)) + .otherwise(int(defs.Status.Unknown)), ) ) return tally @@ -56,10 +67,17 @@ def join_default_with_False(original: pl.LazyFrame, new: pl.LazyFrame, on: str) def add_trivial_run_info(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame: is_trivial = find_trivial(previous_results, config) - return join_default_with_False(benchmarks, is_trivial, on="file").with_columns( + with_info = join_default_with_False(benchmarks, is_trivial, on="file").with_columns( new=pl.col("family").str.starts_with(str(config.current_year)) ) + if config.use_previous_results_for_status: + with_info = with_info.with_columns( + status=pl.when(pl.col("status") != int(defs.Status.Unknown)).then(pl.col("status")).otherwise(c_result) + ) + + return with_info + def track_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config, target_track: defs.Track) -> pl.LazyFrame: used_logics = defs.logic_used_for_track(target_track) diff --git a/smtcomp/unpack.py b/smtcomp/unpack.py index b2b4e9f1..a06285c6 100644 --- a/smtcomp/unpack.py +++ b/smtcomp/unpack.py @@ -5,7 +5,7 @@ from zipfile import ZipFile import tarfile from stat import S_IXUSR -import gzip +import gzip, bz2 import io from typing import AnyStr, cast, IO from subprocess import check_output, STDOUT @@ -62,5 +62,8 @@ def read_cin(file: Path) -> str: if file.name.endswith(".gz"): with gzip.open(file, "rt") as f: return f.read() + elif file.name.endswith(".bz2"): + with bz2.open(file, "rt") as f: + return f.read() else: return file.read_text()