From 578dccf8bab9d910fa7922bc7e4cfb886545616e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Tue, 4 Jun 2024 12:04:21 +0200
Subject: [PATCH 01/15] Merge defs.Config and defs.Datafile for simplicity

---
 README.md                      |  13 ++--
 smtcomp/defs.py                | 108 +++++++++++++++++++++------------
 smtcomp/main.py                |  42 +++++++------
 smtcomp/scramble_benchmarks.py |  11 ++--
 smtcomp/selection.py           |   7 +--
 5 files changed, 105 insertions(+), 76 deletions(-)

diff --git a/README.md b/README.md
index c4a0a9fc..117a3d0d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# smtcomp
+# SMT Competition
 
 [![Release](https://img.shields.io/github/v/release/smtcomp/smtcomp.github.io)](https://img.shields.io/github/v/release/smtcomp/smtcomp.github.io)
 [![Build status](https://img.shields.io/github/actions/workflow/status/smtcomp/smtcomp.github.io/main.yml?branch=main)](https://github.com/smtcomp/smtcomp.github.io/actions/workflows/main.yml?query=branch%3Amain)
@@ -38,7 +38,7 @@ make install
 
 ## For starting a new SMT-COMP year
 
-Edit the file `smtcomp/defs.py`, in particular `Config.current_year`, `Logic` for adding new logics and `tracks` for new divisions.
+Edit the file `smtcomp/defs.py`, in particular `Config.current_year`, `Logic` for adding new logics and `tracks` for new divisions. Reset `Config.NYSE_seed` to `None`, and set the date the New York Stock Exchange Index will be used in `Config.NYSE_date`.
 
 Download the new benchmarks from zenodo, unpack them, unpack the .tar.zst, you should get something like:
 
@@ -74,10 +74,9 @@ smtcomp create-benchmarks-list $DIR/zenodo ./data/
 
 The directory `./data/` is the one present in this repository.
 
-## Using the smtcomp tool for selecting the benchmarks
+## Using the `smtcomp` tool for selecting the benchmarks
 
-The list of benchmarks and the previous results are in json which are human
-readable, but slow to parse (1min). So locally the tool use the feather format. The
+The list of benchmarks and the previous results are in `json` which are human-readable, but slow to parse (1min). So locally the tool use the feather format. The
 feather files are generated with:
 
 ```
@@ -87,7 +86,7 @@ smtcomp create-cache ./data/
 Working with the feather files with [polars](https://docs.pola.rs/) is very fast,
 so no more intermediate files are needed.
 
-However statistics can be shown, for example for the selection of single track:
+However, statistics can be shown, for example for the selection of single track:
 
 ```
 smtcomp show-sq-selection-stats ./data/ 0
@@ -110,7 +109,7 @@ Which outputs:
 ...
 ```
 
-## Using the smtcomp tool for generating benchexec
+## Using the `smtcomp` tool for generating `benchexec` configuration
 
 #### Generate submissions [Optional]
 
diff --git a/smtcomp/defs.py b/smtcomp/defs.py
index 0dc45dce..0988e919 100644
--- a/smtcomp/defs.py
+++ b/smtcomp/defs.py
@@ -9,46 +9,8 @@
 
 from pydantic import BaseModel, Field, RootModel, model_validator, ConfigDict
 from pydantic.networks import HttpUrl, validate_email
-
-
-## Parameters that can change each year
-class Config:
-    current_year = 2024
-    oldest_previous_results = 2018
-    timelimit_s = 60
-    memlimit_M = 1024 * 20
-    cpuCores = 4
-    min_used_benchmarks = 300
-    ratio_of_used_benchmarks = 0.5
-    old_criteria = False
-    """"Do we try to emulate <= 2023 criteria that does not really follow the rules"""
-    invert_triviality = False
-    """Prioritize triviality as much as possible for testing purpose.
-        Look for simple problems instead of hard one"""
-
-    def __init__(self: Config, seed: int | None) -> None:
-        self.__seed = seed
-
-    def seed(self) -> int:
-        if self.__seed is None:
-            raise ValueError("The seed as not been set")
-        s = self.__seed
-        self.__seed = +1
-        return s
-
-
-class DataFiles:
-    def __init__(self, data: Path) -> None:
-        self.previous_results = [
-            (year, data.joinpath(f"results-sq-{year}.json.gz"))
-            for year in range(Config.oldest_previous_results, Config.current_year)
-        ]
-        self.benchmarks = data.joinpath(f"benchmarks-{Config.current_year}.json.gz")
-        self.cached_non_incremental_benchmarks = data.joinpath(
-            f"benchmarks-non-incremental-{Config.current_year}.feather"
-        )
-        self.cached_incremental_benchmarks = data.joinpath(f"benchmarks-incremental-{Config.current_year}.feather")
-        self.cached_previous_results = data.joinpath(f"previous-sq-results-{Config.current_year}.feather")
+from datetime import date
+from rich import print
 
 
 class EnumAutoInt(Enum):
@@ -1368,3 +1330,69 @@ class Result(BaseModel):
 
 class Results(BaseModel):
     results: list[Result]
+
+
+## Parameters that can change each year
+class Config:
+    current_year = 2024
+    oldest_previous_results = 2018
+    timelimit_s = 60
+    memlimit_M = 1024 * 20
+    cpuCores = 4
+    min_used_benchmarks = 300
+    ratio_of_used_benchmarks = 0.5
+    old_criteria = False
+    """"Do we try to emulate <= 2023 criteria that does not really follow the rules"""
+    invert_triviality = False
+    """Prioritize triviality as much as possible for testing purpose.
+        Look for simple problems instead of hard one"""
+
+    nyse_seed = None
+    """The integer part of one hundred times the opening value of the New York Stock Exchange Composite Index on the first day the exchange is open on or after the date specified in nyse_date"""
+    nyse_date = date(year=2024, month=6, day=10)
+
+    def __init__(self, data: Path) -> None:
+        if data.name != "data":
+            raise ValueError("Consistency check, data directory must be named 'data'")
+        self.previous_results = [
+            (year, data.joinpath(f"results-sq-{year}.json.gz"))
+            for year in range(Config.oldest_previous_results, Config.current_year)
+        ]
+        self.benchmarks = data.joinpath(f"benchmarks-{Config.current_year}.json.gz")
+        self.cached_non_incremental_benchmarks = data.joinpath(
+            f"benchmarks-non-incremental-{Config.current_year}.feather"
+        )
+        self.cached_incremental_benchmarks = data.joinpath(f"benchmarks-incremental-{Config.current_year}.feather")
+        self.cached_previous_results = data.joinpath(f"previous-sq-results-{Config.current_year}.feather")
+        self.data = data
+        self.__seed: int | None = None
+
+    @functools.cached_property
+    def submissions(self) -> list[Submission]:
+        return [
+            Submission.model_validate_json(Path(file).read_text()) for file in self.data.glob("../submissions/*.json")
+        ]
+
+    def init_seed(self) -> int:
+        unknown_seed = 0
+        seed = 0
+        for s in self.submissions:
+            if s.seed is None:
+                unknown_seed += 1
+            else:
+                seed += s.seed
+        seed = seed % (2**30)
+        if self.nyse_seed is None:
+            print(f"[red]Warning[/red] NYSE seed not set, and {unknown_seed} submissions are missing a seed")
+        else:
+            if unknown_seed > 0:
+                raise ValueError(f"{unknown_seed} submissions are missing a seed")
+            seed += self.nyse_seed
+        return seed
+
+    def seed(self) -> int:
+        if self.__seed is None:
+            self.__seed = self.init_seed()
+        else:
+            self.__seed += 1
+        return self.__seed
diff --git a/smtcomp/main.py b/smtcomp/main.py
index d10040b3..c16aa314 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -96,6 +96,12 @@ def get_contacts(files: list[Path] = typer.Argument(None)) -> None:
     print("\n".join(contacts))
 
 
+@app.command(rich_help_panel=submissions_panel)
+def get_seed(data: Path) -> None:
+    conf = defs.Config(data)
+    print(conf.seed())
+
+
 @app.command(rich_help_panel=submissions_panel)
 def merge_pull_requests_locally(C: str = ".") -> None:
     submission.merge_all_submissions(C)
@@ -197,11 +203,12 @@ def generate_trivial_benchmarks(dst: Path) -> None:
 
 
 @app.command()
-def generate_benchmarks(dst: Path, seed: int = 0) -> None:
+def generate_benchmarks(dst: Path, data: Path) -> None:
     """
     Generate benchmarks for smtcomp
     """
-    smtcomp.generate_benchmarks.generate_benchmarks(dst, seed)
+    config = defs.Config(data)
+    smtcomp.generate_benchmarks.generate_benchmarks(dst, config.seed())
 
 
 @app.command(rich_help_panel=benchmarks_panel)
@@ -240,9 +247,9 @@ def create_benchmarks_list(src: Path, data: Path, scrambler: Optional[Path] = No
         incremental=cast(List[defs.InfoIncremental], files_incremental),
         non_incremental=cast(List[defs.InfoNonIncremental], files_non_incremental),
     )
-    datafiles = defs.DataFiles(data)
+    config = defs.Config(data)
     print("Writing benchmarks file")
-    write_cin(datafiles.benchmarks, benchmarks.model_dump_json(indent=1))
+    write_cin(config.benchmarks, benchmarks.model_dump_json(indent=1))
 
 
 @app.command(rich_help_panel=benchmarks_panel)
@@ -305,11 +312,10 @@ def show_benchmarks_trivial_stats(data: Path, old_criteria: OLD_CRITERIA = False
 
     Never compet.: old benchmarks never run competitively (more than one prover)
     """
-    config = defs.Config(seed=None)
+    config = defs.Config(data)
     config.old_criteria = old_criteria
-    datafiles = defs.DataFiles(data)
-    benchmarks = pl.read_ipc(datafiles.cached_non_incremental_benchmarks)
-    results = pl.read_ipc(datafiles.cached_previous_results)
+    benchmarks = pl.read_ipc(config.cached_non_incremental_benchmarks)
+    results = pl.read_ipc(config.cached_previous_results)
     benchmarks_with_trivial_info = smtcomp.selection.add_trivial_run_info(benchmarks.lazy(), results.lazy(), config)
     b3 = (
         benchmarks_with_trivial_info.group_by(["logic"])
@@ -354,7 +360,6 @@ def show_benchmarks_trivial_stats(data: Path, old_criteria: OLD_CRITERIA = False
 @app.command(rich_help_panel=selection_panel)
 def show_sq_selection_stats(
     data: Path,
-    seed: int,
     old_criteria: OLD_CRITERIA = False,
     min_use_benchmarks: int = defs.Config.min_used_benchmarks,
     ratio_of_used_benchmarks: float = defs.Config.ratio_of_used_benchmarks,
@@ -367,12 +372,12 @@ def show_sq_selection_stats(
 
     Never compet.: old benchmarks never run competitively (more than one prover)
     """
-    config = defs.Config(seed=seed)
+    config = defs.Config(data)
     config.min_used_benchmarks = min_use_benchmarks
     config.ratio_of_used_benchmarks = ratio_of_used_benchmarks
     config.invert_triviality = invert_triviality
     config.old_criteria = old_criteria
-    benchmarks_with_info = smtcomp.selection.helper_compute_sq(data, config)
+    benchmarks_with_info = smtcomp.selection.helper_compute_sq(config)
     b3 = (
         benchmarks_with_info.group_by(["logic"])
         .agg(
@@ -439,9 +444,9 @@ def print_iterable(i: int, tree: Tree, a: Any) -> None:
 
 @app.command(rich_help_panel=data_panel)
 def create_cache(data: Path) -> None:
-    datafiles = defs.DataFiles(data)
+    config = defs.Config(data)
     print("Loading benchmarks")
-    bench = defs.Benchmarks.model_validate_json(read_cin(datafiles.benchmarks))
+    bench = defs.Benchmarks.model_validate_json(read_cin(config.benchmarks))
     bd: Dict[defs.Smt2File, int] = {}
     for i, smtfile in enumerate(
         itertools.chain(map(lambda x: x.file, bench.non_incremental), map(lambda x: x.file, bench.incremental))
@@ -464,7 +469,7 @@ def create_cache(data: Path) -> None:
     df = pl.DataFrame(bench_simplified)
     # df["family"] = df["family"].astype("string")
     # df["name"] = df["name"].astype("string")
-    df.write_ipc(datafiles.cached_non_incremental_benchmarks)
+    df.write_ipc(config.cached_non_incremental_benchmarks)
 
     print("Creating incremental benchmarks cache as feather file")
     bench_simplified = map(
@@ -480,7 +485,7 @@ def create_cache(data: Path) -> None:
     df = pl.DataFrame(bench_simplified)
     # df["family"] = df["family"].astype("string")
     # df["name"] = df["name"].astype("string")
-    df.write_ipc(datafiles.cached_incremental_benchmarks)
+    df.write_ipc(config.cached_incremental_benchmarks)
 
     def convert(x: defs.Result, year: int) -> dict[str, int | str | float] | None:
         if x.file not in bd:
@@ -497,14 +502,14 @@ def convert(x: defs.Result, year: int) -> dict[str, int | str | float] | None:
         }
 
     results_filtered: list[Any] = []
-    for year, file in track(datafiles.previous_results, description="Loading json results"):
+    for year, file in track(config.previous_results, description="Loading json results"):
         results = defs.Results.model_validate_json(read_cin(file))
         results_filtered.extend(filter(lambda x: x is not None, map(lambda r: convert(r, year), results.results)))
 
     print("Creating old results cache as feather file")
     df = pl.DataFrame(results_filtered)
     # df["solver"] = df["solver"].astype("string")
-    df.write_ipc(datafiles.cached_previous_results)
+    df.write_ipc(config.cached_previous_results)
 
 
 # def conv(x:defs.Smt2FileOld) -> defs.Info:
@@ -524,7 +529,6 @@ def select_and_scramble(
     srcdir: Path,
     dstdir: Path,
     scrambler: Path,
-    seed: int,
     max_workers: int = 8,
 ) -> None:
     """
@@ -534,7 +538,7 @@ def select_and_scramble(
     outlined in the data.
     """
 
-    config = defs.Config(seed=seed)
+    config = defs.Config(data)
     smtcomp.scramble_benchmarks.select_and_scramble(
         competition_track, data, config, srcdir, dstdir, scrambler, max_workers
     )
diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py
index b25a4835..212376b5 100644
--- a/smtcomp/scramble_benchmarks.py
+++ b/smtcomp/scramble_benchmarks.py
@@ -115,7 +115,6 @@ def test_select_and_scramble(
 
 def select_and_scramble(
     competition_track: defs.Track,
-    data: Path,
     config: defs.Config,
     srcdir: Path,
     dstdir: Path,
@@ -126,27 +125,27 @@ def select_and_scramble(
 
     match competition_track:
         case defs.Track.SingleQuery:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
         case defs.Track.Incremental:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
             exit(1)
         case defs.Track.ModelValidation:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
             exit(1)
         case defs.Track.UnsatCore:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
             exit(1)
         case defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
diff --git a/smtcomp/selection.py b/smtcomp/selection.py
index 3dd1cc7e..736e74d8 100644
--- a/smtcomp/selection.py
+++ b/smtcomp/selection.py
@@ -115,13 +115,12 @@ def sq_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config) -> pl.
     )
 
 
-def helper_compute_sq(data: Path, config: defs.Config) -> pl.LazyFrame:
+def helper_compute_sq(config: defs.Config) -> pl.LazyFrame:
     """
     Returned columns: file (uniq id), logic, family,name, status, asserts nunmber, trivial, run (in previous year), new (benchmarks), selected
     """
-    datafiles = defs.DataFiles(data)
-    benchmarks = pl.read_ipc(datafiles.cached_non_incremental_benchmarks)
-    results = pl.read_ipc(datafiles.cached_previous_results)
+    benchmarks = pl.read_ipc(config.cached_non_incremental_benchmarks)
+    results = pl.read_ipc(config.cached_previous_results)
     benchmarks_with_info = add_trivial_run_info(benchmarks.lazy(), results.lazy(), config)
     if config.invert_triviality:
         trivial_in_logic = pl.col("trivial").any().over(["logic"])

From 0683e1a94c1598ece733a9b50c7e2144dbe1870f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 5 Jun 2024 14:41:21 +0200
Subject: [PATCH 02/15] [Selection] Cloud, and parallel track

  a minute difference from last year, hard and unsolved are shuffled independantly.
---
 smtcomp/defs.py      | 27 ++++++++++++---
 smtcomp/selection.py | 82 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 100 insertions(+), 9 deletions(-)

diff --git a/smtcomp/defs.py b/smtcomp/defs.py
index 0988e919..1d39c66d 100644
--- a/smtcomp/defs.py
+++ b/smtcomp/defs.py
@@ -5,13 +5,14 @@
 import re
 from enum import Enum
 from pathlib import Path, PurePath
-from typing import Any, Dict, cast, Optional
+from typing import Any, Dict, cast, Optional, Iterable, TypeVar
 
 from pydantic import BaseModel, Field, RootModel, model_validator, ConfigDict
 from pydantic.networks import HttpUrl, validate_email
 from datetime import date
 from rich import print
 
+U = TypeVar('U')
 
 class EnumAutoInt(Enum):
     """
@@ -1186,6 +1187,9 @@ def complete(self, archive: Archive | None, command: Command | None) -> Particip
 
 import itertools
 
+def union(s:Iterable[set[U]]) -> set[U]:
+    return functools.reduce(lambda x, y: x | y,s,set())
+    
 
 class Participations(RootModel):
     root: list[Participation]
@@ -1193,14 +1197,18 @@ class Participations(RootModel):
     def get_divisions(self, l: list[Track] = list(Track)) -> set[Division]:
         """ " Return the divisions in which the solver participates"""
         tracks = self.get()
-        divs = [set(tracks[track].keys()) for track in l]
-        return functools.reduce(lambda x, y: x | y, divs)
+        return union(set(tracks[track].keys()) for track in l if track in tracks)
 
     def get_logics(self, l: list[Track] = list(Track)) -> set[Logic]:
         """ " Return the logics in which the solver participates"""
         tracks = self.get()
-        logics = itertools.chain.from_iterable([iter(tracks[track].values()) for track in l])
-        return functools.reduce(lambda x, y: x | y, logics)
+        return union(itertools.chain.from_iterable([tracks[track].values() for track in l if track in tracks]))
+
+    def get_logics_by_track(self) -> dict[Track,set[Logic]]:
+        """ Return the logics in which the solver participates"""
+        tracks = self.get()
+        return dict( (track, union(tracks[track].values())) for track in tracks)
+        
 
     def get(self, d: None | dict[Track, dict[Division, set[Logic]]] = None) -> dict[Track, dict[Division, set[Logic]]]:
         if d is None:
@@ -1350,6 +1358,15 @@ class Config:
     nyse_seed = None
     """The integer part of one hundred times the opening value of the New York Stock Exchange Composite Index on the first day the exchange is open on or after the date specified in nyse_date"""
     nyse_date = date(year=2024, month=6, day=10)
+    
+    aws_timelimit_hard=600
+    """
+    Time in seconds upon which benchmarks are considered hards
+    """
+    aws_num_selected=400
+    """
+    Number of selected benchmarks
+    """
 
     def __init__(self, data: Path) -> None:
         if data.name != "data":
diff --git a/smtcomp/selection.py b/smtcomp/selection.py
index 736e74d8..b5cf1fc8 100644
--- a/smtcomp/selection.py
+++ b/smtcomp/selection.py
@@ -1,6 +1,6 @@
 # mypy: allow-any-unimported
 
-import functools
+import functools, itertools
 from typing import Set, Dict, Optional, cast, List, DefaultDict
 from pathlib import Path, PurePath
 from smtcomp import defs
@@ -48,13 +48,17 @@ def find_trivial(results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
     )
     return tally
 
+def join_default_with_False(original:pl.LazyFrame, new: pl.LazyFrame,
+                            on:str) -> pl.LazyFrame:
+    return (original.join(new, on="file", how="left", coalesce=True)
+        .fill_null(False))
+
 
 def add_trivial_run_info(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
 
     is_trivial = find_trivial(previous_results, config)
     return (
-        benchmarks.join(is_trivial, on="file", how="outer_coalesce")
-        .fill_null(False)
+        join_default_with_False(benchmarks,is_trivial, on="file")
         .with_columns(new=pl.col("family").str.starts_with(str(config.current_year)))
     )
 
@@ -75,7 +79,7 @@ def sq_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config) -> pl.
         new_benchmarks=c_file.filter(c_new == True),
         old_benchmarks=c_file.filter(c_new == False),
     )
-
+    
     # Expression computing the number of sample to take for each logic
     sample_size = pl.min_horizontal(
         c_all_len,
@@ -127,3 +131,73 @@ def helper_compute_sq(config: defs.Config) -> pl.LazyFrame:
         inverted_or_not_trivial = pl.when(trivial_in_logic).then(pl.col("trivial").not_()).otherwise(pl.col("trivial"))
         benchmarks_with_info = benchmarks_with_info.with_columns(trivial=inverted_or_not_trivial)
     return sq_selection(benchmarks_with_info, config)
+
+def competitive_logics(config: defs.Config) -> pl.LazyFrame:
+    """
+    returned columns track, logic, competitive:bool
+    """
+    l=(s.participations.get_logics_by_track() for s in config.submissions)
+    dd=list(itertools.chain.from_iterable(map((lambda x: ((int(k), list(map(int,s))) for k,s in x.items())),l)))
+    return (pl.DataFrame(dd,schema=["track","logic"])
+            .lazy()
+            .explode("logic")
+            .group_by("track","logic")
+            .agg(competitive=(pl.col("logic").len() > 1)))
+
+@functools.cache
+def tracks() -> pl.LazyFrame:
+    """
+    returned columns track, division, logic
+    """
+    l = ((int(track),int(division),int(logic)) for track, divisions in defs.tracks.items() for division,logics in divisions.items() for logic in logics )
+    return pl.DataFrame(l,schema=["track","division","logic"]).lazy()
+
+def aws_selection(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
+    aws_track=[defs.Track.Cloud,defs.Track.Parallel]
+
+    # Add division information to competitive logic
+    logics=competitive_logics(config).filter(pl.col("track").is_in(list(map(int,aws_track))),competitive=True)
+    logics = logics.join(tracks(),on=["track","logic"],how="inner")
+
+    #Keep only competitive logic from this track
+    b=benchmarks.join(logics,on="logic",how="inner")
+    previous_results=previous_results.join(b,on="file",how="semi")
+    
+    #Keep only hard and unsolved benchmarks
+    solved=(c_result.is_in({int(defs.Status.Sat),int(defs.Status.Unsat)}))
+    solved_within_timelimit= (solved & (c_cpu_time <= config.aws_timelimit_hard))
+    
+    hard_unsolved = (previous_results
+        # Remove bench solved within the timelimit by any solver
+        .filter(solved_within_timelimit.not_().all().over("file"))
+        .group_by("file")
+        .agg(
+            hard=solved.any(),
+            unsolved=solved.not_().all()
+        ))
+        
+    b = b.join(hard_unsolved, how="inner", on="file")
+
+    def sample(lf:pl.LazyFrame) -> pl.LazyFrame:
+        n=pl.min_horizontal(pl.col("file").list.len(),config.aws_num_selected)
+        return lf.with_columns(file=pl.col("file").list.sample(n=n,seed=config.seed()))
+    
+    #Sample at the logic level
+    b = sample(b
+                  .group_by("track","division","logic")
+                  .agg(file=c_file)
+                  )
+
+    #Sample at the division level
+    b = sample(b
+                  .group_by("track","division")
+                  .agg(file=c_file.flatten()))
+
+    #Sample at the track level
+    b = sample(b
+                  .group_by("track")
+                  .agg(file=c_file.flatten())
+                  )
+    
+
+    return b.explode("file").join(benchmarks,on="file",how="inner")
\ No newline at end of file

From 3bacc8743df7e68e2236db99c569d9ee1636b90d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Fri, 7 Jun 2024 10:36:51 +0200
Subject: [PATCH 03/15] Seed stays the same, for each request

   sampling are less uniform, but help reproducibility
---
 smtcomp/defs.py                |  33 ++++-----
 smtcomp/main.py                |   8 +--
 smtcomp/scramble_benchmarks.py |  19 ++----
 smtcomp/selection.py           | 118 ++++++++++++++++-----------------
 4 files changed, 82 insertions(+), 96 deletions(-)

diff --git a/smtcomp/defs.py b/smtcomp/defs.py
index 1d39c66d..695e5384 100644
--- a/smtcomp/defs.py
+++ b/smtcomp/defs.py
@@ -12,7 +12,8 @@
 from datetime import date
 from rich import print
 
-U = TypeVar('U')
+U = TypeVar("U")
+
 
 class EnumAutoInt(Enum):
     """
@@ -1187,9 +1188,10 @@ def complete(self, archive: Archive | None, command: Command | None) -> Particip
 
 import itertools
 
-def union(s:Iterable[set[U]]) -> set[U]:
-    return functools.reduce(lambda x, y: x | y,s,set())
-    
+
+def union(s: Iterable[set[U]]) -> set[U]:
+    return functools.reduce(lambda x, y: x | y, s, set())
+
 
 class Participations(RootModel):
     root: list[Participation]
@@ -1204,11 +1206,10 @@ def get_logics(self, l: list[Track] = list(Track)) -> set[Logic]:
         tracks = self.get()
         return union(itertools.chain.from_iterable([tracks[track].values() for track in l if track in tracks]))
 
-    def get_logics_by_track(self) -> dict[Track,set[Logic]]:
-        """ Return the logics in which the solver participates"""
+    def get_logics_by_track(self) -> dict[Track, set[Logic]]:
+        """Return the logics in which the solver participates"""
         tracks = self.get()
-        return dict( (track, union(tracks[track].values())) for track in tracks)
-        
+        return dict((track, union(tracks[track].values())) for track in tracks)
 
     def get(self, d: None | dict[Track, dict[Division, set[Logic]]] = None) -> dict[Track, dict[Division, set[Logic]]]:
         if d is None:
@@ -1358,12 +1359,12 @@ class Config:
     nyse_seed = None
     """The integer part of one hundred times the opening value of the New York Stock Exchange Composite Index on the first day the exchange is open on or after the date specified in nyse_date"""
     nyse_date = date(year=2024, month=6, day=10)
-    
-    aws_timelimit_hard=600
+
+    aws_timelimit_hard = 600
     """
     Time in seconds upon which benchmarks are considered hards
     """
-    aws_num_selected=400
+    aws_num_selected = 400
     """
     Number of selected benchmarks
     """
@@ -1390,7 +1391,8 @@ def submissions(self) -> list[Submission]:
             Submission.model_validate_json(Path(file).read_text()) for file in self.data.glob("../submissions/*.json")
         ]
 
-    def init_seed(self) -> int:
+    @functools.cached_property
+    def seed(self) -> int:
         unknown_seed = 0
         seed = 0
         for s in self.submissions:
@@ -1406,10 +1408,3 @@ def init_seed(self) -> int:
                 raise ValueError(f"{unknown_seed} submissions are missing a seed")
             seed += self.nyse_seed
         return seed
-
-    def seed(self) -> int:
-        if self.__seed is None:
-            self.__seed = self.init_seed()
-        else:
-            self.__seed += 1
-        return self.__seed
diff --git a/smtcomp/main.py b/smtcomp/main.py
index c16aa314..e36f38d4 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -99,7 +99,7 @@ def get_contacts(files: list[Path] = typer.Argument(None)) -> None:
 @app.command(rich_help_panel=submissions_panel)
 def get_seed(data: Path) -> None:
     conf = defs.Config(data)
-    print(conf.seed())
+    print(conf.seed)
 
 
 @app.command(rich_help_panel=submissions_panel)
@@ -208,7 +208,7 @@ def generate_benchmarks(dst: Path, data: Path) -> None:
     Generate benchmarks for smtcomp
     """
     config = defs.Config(data)
-    smtcomp.generate_benchmarks.generate_benchmarks(dst, config.seed())
+    smtcomp.generate_benchmarks.generate_benchmarks(dst, config.seed)
 
 
 @app.command(rich_help_panel=benchmarks_panel)
@@ -539,9 +539,7 @@ def select_and_scramble(
     """
 
     config = defs.Config(data)
-    smtcomp.scramble_benchmarks.select_and_scramble(
-        competition_track, data, config, srcdir, dstdir, scrambler, max_workers
-    )
+    smtcomp.scramble_benchmarks.select_and_scramble(competition_track, config, srcdir, dstdir, scrambler, max_workers)
 
 
 @app.command()
diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py
index 212376b5..41f205a7 100644
--- a/smtcomp/scramble_benchmarks.py
+++ b/smtcomp/scramble_benchmarks.py
@@ -26,7 +26,7 @@ def scramble_file(fdict: dict, incremental: bool, srcdir: Path, dstdir: Path, ar
 
 
 def create_scramble_id(benchmarks: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
-    files = benchmarks.sort("file").select(pl.col("file").shuffle(seed=config.seed()))
+    files = benchmarks.sort("file").select(pl.col("file").shuffle(seed=config.seed))
     files = files.with_row_index(name="scramble_id")
     return benchmarks.join(files, on="file")
 
@@ -43,7 +43,7 @@ def scramble_lazyframe(
     args = []
     files = benchmarks.select("scramble_id", "logic", "family", "name").collect().to_dicts()
     incremental = False
-    seed = config.seed()
+    seed = config.seed
 
     match competition_track:
         case defs.Track.SingleQuery:
@@ -73,38 +73,35 @@ def scramble_lazyframe(
 
 def test_select_and_scramble(
     competition_track: defs.Track,
-    data: Path,
     config: defs.Config,
     srcdir: Path,
     dstdir: Path,
     scrambler: Path,
     max_workers: int,
 ) -> None:
-    seed = config.seed()
-
     match competition_track:
         case defs.Track.SingleQuery:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
         case defs.Track.Incremental:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
             exit(1)
         case defs.Track.ModelValidation:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
             exit(1)
         case defs.Track.UnsatCore:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
             exit(1)
         case defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
-            selected = smtcomp.selection.helper_compute_sq(data, config)
+            selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
@@ -121,8 +118,6 @@ def select_and_scramble(
     scrambler: Path,
     max_workers: int,
 ) -> None:
-    seed = config.seed()
-
     match competition_track:
         case defs.Track.SingleQuery:
             selected = smtcomp.selection.helper_compute_sq(config)
diff --git a/smtcomp/selection.py b/smtcomp/selection.py
index b5cf1fc8..8dc7fde9 100644
--- a/smtcomp/selection.py
+++ b/smtcomp/selection.py
@@ -48,18 +48,16 @@ def find_trivial(results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
     )
     return tally
 
-def join_default_with_False(original:pl.LazyFrame, new: pl.LazyFrame,
-                            on:str) -> pl.LazyFrame:
-    return (original.join(new, on="file", how="left", coalesce=True)
-        .fill_null(False))
+
+def join_default_with_False(original: pl.LazyFrame, new: pl.LazyFrame, on: str) -> pl.LazyFrame:
+    return original.join(new, on="file", how="left", coalesce=True).fill_null(False)
 
 
 def add_trivial_run_info(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
 
     is_trivial = find_trivial(previous_results, config)
-    return (
-        join_default_with_False(benchmarks,is_trivial, on="file")
-        .with_columns(new=pl.col("family").str.starts_with(str(config.current_year)))
+    return join_default_with_False(benchmarks, is_trivial, on="file").with_columns(
+        new=pl.col("family").str.starts_with(str(config.current_year))
     )
 
 
@@ -79,7 +77,7 @@ def sq_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config) -> pl.
         new_benchmarks=c_file.filter(c_new == True),
         old_benchmarks=c_file.filter(c_new == False),
     )
-    
+
     # Expression computing the number of sample to take for each logic
     sample_size = pl.min_horizontal(
         c_all_len,
@@ -97,14 +95,14 @@ def sq_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config) -> pl.
     new_benchmarks_sampled = (
         pl.col("new_benchmarks")
         .filter(new_sample_size > 0)
-        .list.sample(n=new_sample_size.filter(new_sample_size > 0), seed=config.seed())
+        .list.sample(n=new_sample_size.filter(new_sample_size > 0), seed=config.seed)
         .list.explode()
         .drop_nulls()
     )
     old_benchmarks_sampled = (
         pl.col("old_benchmarks")
         .filter(old_sample_size > 0)
-        .list.sample(n=old_sample_size.filter(old_sample_size > 0), seed=config.seed())
+        .list.sample(n=old_sample_size.filter(old_sample_size > 0), seed=config.seed)
         .list.explode()
         .drop_nulls()
     )
@@ -132,72 +130,72 @@ def helper_compute_sq(config: defs.Config) -> pl.LazyFrame:
         benchmarks_with_info = benchmarks_with_info.with_columns(trivial=inverted_or_not_trivial)
     return sq_selection(benchmarks_with_info, config)
 
+
 def competitive_logics(config: defs.Config) -> pl.LazyFrame:
     """
     returned columns track, logic, competitive:bool
     """
-    l=(s.participations.get_logics_by_track() for s in config.submissions)
-    dd=list(itertools.chain.from_iterable(map((lambda x: ((int(k), list(map(int,s))) for k,s in x.items())),l)))
-    return (pl.DataFrame(dd,schema=["track","logic"])
-            .lazy()
-            .explode("logic")
-            .group_by("track","logic")
-            .agg(competitive=(pl.col("logic").len() > 1)))
+    l = (s.participations.get_logics_by_track() for s in config.submissions)
+    dd = list(itertools.chain.from_iterable(map((lambda x: ((int(k), list(map(int, s))) for k, s in x.items())), l)))
+    return (
+        pl.DataFrame(dd, schema=["track", "logic"])
+        .lazy()
+        .explode("logic")
+        .group_by("track", "logic")
+        .agg(competitive=(pl.col("logic").len() > 1))
+    )
+
 
 @functools.cache
 def tracks() -> pl.LazyFrame:
     """
     returned columns track, division, logic
     """
-    l = ((int(track),int(division),int(logic)) for track, divisions in defs.tracks.items() for division,logics in divisions.items() for logic in logics )
-    return pl.DataFrame(l,schema=["track","division","logic"]).lazy()
+    l = (
+        (int(track), int(division), int(logic))
+        for track, divisions in defs.tracks.items()
+        for division, logics in divisions.items()
+        for logic in logics
+    )
+    return pl.DataFrame(l, schema=["track", "division", "logic"]).lazy()
+
 
 def aws_selection(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
-    aws_track=[defs.Track.Cloud,defs.Track.Parallel]
+    aws_track = [defs.Track.Cloud, defs.Track.Parallel]
 
     # Add division information to competitive logic
-    logics=competitive_logics(config).filter(pl.col("track").is_in(list(map(int,aws_track))),competitive=True)
-    logics = logics.join(tracks(),on=["track","logic"],how="inner")
-
-    #Keep only competitive logic from this track
-    b=benchmarks.join(logics,on="logic",how="inner")
-    previous_results=previous_results.join(b,on="file",how="semi")
-    
-    #Keep only hard and unsolved benchmarks
-    solved=(c_result.is_in({int(defs.Status.Sat),int(defs.Status.Unsat)}))
-    solved_within_timelimit= (solved & (c_cpu_time <= config.aws_timelimit_hard))
-    
-    hard_unsolved = (previous_results
+    logics = competitive_logics(config).filter(pl.col("track").is_in(list(map(int, aws_track))), competitive=True)
+    logics = logics.join(tracks(), on=["track", "logic"], how="inner")
+
+    # Keep only competitive logic from this track
+    b = benchmarks.join(logics, on="logic", how="inner")
+    previous_results = previous_results.join(b, on="file", how="semi")
+
+    # Keep only hard and unsolved benchmarks
+    solved = c_result.is_in({int(defs.Status.Sat), int(defs.Status.Unsat)})
+    solved_within_timelimit = solved & (c_cpu_time <= config.aws_timelimit_hard)
+
+    hard_unsolved = (
+        previous_results
         # Remove bench solved within the timelimit by any solver
         .filter(solved_within_timelimit.not_().all().over("file"))
         .group_by("file")
-        .agg(
-            hard=solved.any(),
-            unsolved=solved.not_().all()
-        ))
-        
+        .agg(hard=solved.any(), unsolved=solved.not_().all())
+    )
+
     b = b.join(hard_unsolved, how="inner", on="file")
 
-    def sample(lf:pl.LazyFrame) -> pl.LazyFrame:
-        n=pl.min_horizontal(pl.col("file").list.len(),config.aws_num_selected)
-        return lf.with_columns(file=pl.col("file").list.sample(n=n,seed=config.seed()))
-    
-    #Sample at the logic level
-    b = sample(b
-                  .group_by("track","division","logic")
-                  .agg(file=c_file)
-                  )
-
-    #Sample at the division level
-    b = sample(b
-                  .group_by("track","division")
-                  .agg(file=c_file.flatten()))
-
-    #Sample at the track level
-    b = sample(b
-                  .group_by("track")
-                  .agg(file=c_file.flatten())
-                  )
-    
-
-    return b.explode("file").join(benchmarks,on="file",how="inner")
\ No newline at end of file
+    def sample(lf: pl.LazyFrame) -> pl.LazyFrame:
+        n = pl.min_horizontal(pl.col("file").list.len(), config.aws_num_selected)
+        return lf.with_columns(file=pl.col("file").list.sample(n=n, seed=config.seed))
+
+    # Sample at the logic level
+    b = sample(b.group_by("track", "division", "logic").agg(file=c_file))
+
+    # Sample at the division level
+    b = sample(b.group_by("track", "division").agg(file=c_file.flatten()))
+
+    # Sample at the track level
+    b = sample(b.group_by("track").agg(file=c_file.flatten()))
+
+    return b.explode("file").join(benchmarks, on="file", how="inner")

From 2b558a7dd232a93a50b7301aa179f44648735d9c Mon Sep 17 00:00:00 2001
From: mbromber <83694638+mbromber@users.noreply.github.com>
Date: Wed, 12 Jun 2024 01:05:36 +0200
Subject: [PATCH 04/15] 2024scrambler incr (#89)

* Scrambler now prepends the statuses of all check calls.

* Swapped select-and-scramble back from test mode

* Blackify.

* My fixed typing error.
---
 smtcomp/scramble_benchmarks.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py
index 41f205a7..389a902f 100644
--- a/smtcomp/scramble_benchmarks.py
+++ b/smtcomp/scramble_benchmarks.py
@@ -22,7 +22,19 @@ def scramble_file(fdict: dict, incremental: bool, srcdir: Path, dstdir: Path, ar
     dstdir = dstdir.joinpath(str(defs.Logic.of_int(fdict["logic"])))
     fdst = dstdir.joinpath("scrambled" + str(fdict["scramble_id"]) + ".smt2")
     dstdir.mkdir(parents=True, exist_ok=True)
-    subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("w"))
+
+    if incremental:
+        subprocess.run(
+            ["grep", "-o", "(set-info :status \\(sat\\|unsat\\|unknown\\))"],
+            stdin=fsrc.open("r"),
+            stdout=fdst.open("w"),
+        )
+        subprocess.run(["sed", "-i", "s/(set-info :status \\(.*\\))/\\1/", str(fdst)])
+        with fdst.open("a") as dstfile:
+            dstfile.write("--- BENCHMARK BEGINS HERE ---\n")
+        subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("a"))
+    else:
+        subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("w"))
 
 
 def create_scramble_id(benchmarks: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
@@ -83,11 +95,11 @@ def test_select_and_scramble(
         case defs.Track.SingleQuery:
             selected = smtcomp.selection.helper_compute_sq(config)
         case defs.Track.Incremental:
-            selected = smtcomp.selection.helper_compute_sq(config)
-            rich.print(
-                f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
-            )
-            exit(1)
+            selected = pl.read_ipc(config.cached_incremental_benchmarks).lazy()
+        #            rich.print(
+        #                f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
+        #            )
+        #            exit(1)
         case defs.Track.ModelValidation:
             selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
@@ -106,7 +118,7 @@ def test_select_and_scramble(
                 f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
             )
             exit(1)
-    selected = create_scramble_id(selected, config).filter(selected=True).filter(pl.col("logic") < 3)
+    selected = create_scramble_id(selected, config).filter(pl.col("logic") == int(defs.Logic.BVFP))
     scramble_lazyframe(selected, competition_track, config, srcdir, dstdir, scrambler, max_workers)
 
 

From c176e602aeb98edff6332b4e8cf5758906039d04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Jon=C3=A1=C5=A1?= <359542@mail.muni.cz>
Date: Wed, 12 Jun 2024 01:17:25 +0200
Subject: [PATCH 05/15] More infrastructure for cluster execution (#91)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Scrambler now prepends the statuses of all check calls.

* Swapped select-and-scramble back from test mode

* Blackify.

* My fixed typing error.

* Unpack: recognize also ZIP archives without file extension.

* Update readme.

* Generate tool_info module and benchmark definitions for each tool.

* tool_info module: Make generic and support required files.

* Generate trivial incremental benchmarks.

* Generate benchexec definitions for incremental track.

* Mark error.log as benchexec result file.

* Do not generate empty benchmark defs.

This happens for example if the tool does not participate in the incremental track.

* Correctly interpret incremental results in benchexec.

* Minor.

* Generate also benchmark definitions for BenchExec.

* Specify property file in the generated benchexec files.

* Generate benchmark info from scrambler.

* Always generate the property def.

* Add generation of test benchmarks.

* Use 1 seed for test scrambling.

* Blackify.

* Solver submissions webpage (#79)

* Add webpage with participants.

* Temporary fix for some merge requests that modify more files.

* Add basic support for generation of submission raw data as JSON.

* Revert ours merge strategy.

* Do not use ours merge strategy.

* Add preliminary participant list.

* Throw error for invalid submissions.

* Add iProver to participants json.

* Blackify.

* Fix mypy annotations.

---------

Co-authored-by: Martin Jonáš <martin.jonas@mail.muni.cz>

* Selection and scrambling of incremental benchmarks.

* Fix mypy errors.

* Typos.

* Less strict type annotation due to calling internal bechexec function.

* Fix whitespace.

---------

Co-authored-by: mbromber <mbromber@mpi-inf.mpg.de>
Co-authored-by: Martin Jonáš <martin.jonas@mail.muni.cz>
---
 README.md                      |  12 +--
 smtcomp/benchexec.py           | 144 +++++++++++++++++++++++----------
 smtcomp/defs.py                |   1 +
 smtcomp/execution.py           |   6 +-
 smtcomp/generate_benchmarks.py |  72 ++++++++++++++---
 smtcomp/incremental_tool.py    |  86 ++++++++++++++++++++
 smtcomp/main.py                |  30 +++++--
 smtcomp/scramble_benchmarks.py |  23 ++++--
 smtcomp/selection.py           |  20 ++++-
 smtcomp/submission.py          |   2 +-
 smtcomp/tool.py                |  32 ++++++--
 smtcomp/unpack.py              |  10 ++-
 12 files changed, 354 insertions(+), 84 deletions(-)
 create mode 100644 smtcomp/incremental_tool.py

diff --git a/README.md b/README.md
index 117a3d0d..5d767c66 100644
--- a/README.md
+++ b/README.md
@@ -128,37 +128,37 @@ smtcomp show ../tmp/submissions/YicesQS.json
 The solver downloaded using:
 
 ```
-smtcomp download-archive ../tmp/submissions/*.json ../tmp/benchexec/execution
+smtcomp download-archive submissions/*.json ../tmp/execution
 ```
 
 Trivial tests benchmarks generated with:
 
 ```
-smtcomp generate-benchmarks ../tmp/benchexec/includes/
+smtcomp generate-trivial-benchmarks ../tmp/execution/benchmarks
 ```
 
 The benchexec tasks generated using:
 
 ```
-smtcomp generate-benchexec ../tmp/submissions/*.json ../tmp/benchexec/includes/all.xml ../tmp/benchexec/execution
+smtcomp generate-benchexec submissions/*.json ../tmp/execution
 ```
 
 The benchexec execution environment generated using:
 
 ```
-smtcomp prepare-execution ../tmp/benchexec/execution
+smtcomp prepare-execution ../tmp/execution
 ```
 
 Benchexec started using:
 
 ```
-(cd ../tmp/benchexec/execution; benchexec ../includes/all.xml  --read-only-dir / --overlay-dir /home --full-access-dir .. --numOfThreads 8 -M 2GB -c 1)
+(cd ../tmp/execution; PYTHONPATH=$(pwd) benchexec SOLVERNAME.xml --read-only-dir / --overlay-dir /home --full-access-dir .. --numOfThreads 8 -M 2GB -c 1)
 ```
 
 Benchexec in verifier cloud started using:
 
 ```
-(cd ../tmp/benchexec/execution; PATH_TO_BENCHEXEC/contrib/vcloud-benchmark.py ../includes/all.xml  --read-only-dir / --overlay-dir /home --full-access-dir .. --numOfThreads 8 -M 2GB -c 1 --vcloudMaster VCLOUD_MASTER --vcloudClientHeap 500)
+(cd ../tmp/execution; PYTHONPATH=$(pwd) PATH_TO_BENCHEXEC/contrib/vcloud-benchmark.py SOLVERNAME.xml --read-only-dir / --overlay-dir /home --full-access-dir .. --numOfThreads 8 -M 2GB -c 1 --vcloudMaster VCLOUD_MASTER --vcloudClientHeap 500)
 ```
 
 ---
diff --git a/smtcomp/benchexec.py b/smtcomp/benchexec.py
index 0c94f230..faac6361 100644
--- a/smtcomp/benchexec.py
+++ b/smtcomp/benchexec.py
@@ -1,14 +1,16 @@
 from pathlib import Path
+import rich
 from os.path import relpath
 from typing import List, cast, Dict, Optional
 
 from yattag import Doc, indent
 
 from smtcomp import defs
-from smtcomp.archive import find_command
+from smtcomp.archive import find_command, archive_unpack_dir
 from pydantic import BaseModel
 
 import shlex
+from re import sub
 
 
 class CmdTask(BaseModel):
@@ -17,7 +19,70 @@ class CmdTask(BaseModel):
     includesfiles: List[str]
 
 
-def generate_xml(timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: List[CmdTask], file: Path) -> None:
+def generate_benchmark_yml(benchmark: Path, expected_result: Optional[bool], orig_file: Optional[Path]) -> None:
+    ymlfile = benchmark.with_suffix(".yml")
+    with ymlfile.open("w") as f:
+        f.write("format_version: '2.0'\n\n")
+
+        f.write(f"input_files: '{str(benchmark.name)}'\n\n")
+
+        if orig_file is not None:
+            f.write(f"# original_files: '{str(orig_file)}'\n\n")
+
+        expected_str = "true" if expected_result else "false"
+        f.write("properties:\n")
+        f.write(f"  - property_file: '../../properties/SMT.prp'\n")
+        if expected_result is not None:
+            f.write(f"    expected_verdict: {expected_str}\n")
+
+
+def tool_module_name(s: defs.Submission, track: defs.Track) -> str:
+    return sub(r"\W+", "", s.name.lower()) + get_suffix(track)
+
+
+def generate_tool_module(s: defs.Submission, cachedir: Path, track: defs.Track) -> None:
+    name = tool_module_name(s, track)
+    file = cachedir / "tools" / f"{name}.py"
+
+    with file.open("w") as f:
+        match track:
+            case defs.Track.Incremental:
+                base_module = "incremental_tool"
+                base_class = "IncrementalSMTCompTool"
+            case defs.Track.SingleQuery:
+                base_module = "tool"
+                base_class = "SMTCompTool"
+            case _:
+                rich.print(
+                    f"[red]generate_tool_module command does not yet work for the competition track: {track}[/red]"
+                )
+                exit(1)
+        f.write(f"from tools.{base_module} import {base_class}\n\n")
+        f.write(f"class Tool({base_class}):  # type: ignore\n")
+
+        f.write(f"    NAME = '{s.name}'\n")
+        if s.command is not None:
+            assert s.archive is not None
+            executable_path = find_command(s.command, s.archive, cachedir)
+            executable = str(relpath(executable_path, start=str(cachedir)))
+            f.write(f"    EXECUTABLE = '{executable}'\n")
+
+        required_paths = []
+
+        if s.archive is not None:
+            archive_path = relpath(archive_unpack_dir(s.archive, cachedir), start=str(cachedir))
+            required_paths.append(str(archive_path))
+        for p in s.participations.root:
+            if p.archive is not None:
+                archive_path = relpath(archive_unpack_dir(p.archive, cachedir), start=str(cachedir))
+                required_paths.append(str(archive_path))
+        if required_paths:
+            f.write(f"    REQUIRED_PATHS = {required_paths}\n")
+
+
+def generate_xml(
+    timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: List[CmdTask], cachedir: Path, tool_module_name: str
+) -> None:
     doc, tag, text = Doc().tagtext()
 
     doc.asis('<?xml version="1.0"?>')
@@ -27,13 +92,18 @@ def generate_xml(timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: Lis
     )
     with tag(
         "benchmark",
-        tool=f"smtcomp.tool",
+        tool=f"tools.{tool_module_name}",
         timelimit=f"{timelimit_s}s",
         hardlimit=f"{timelimit_s+30}s",
         memlimit=f"{memlimit_M} MB",
         cpuCores=f"{cpuCores}",
-        displayname="SC",
     ):
+        with tag("require", cpuModel="Intel Xeon E3-1230 v5 @ 3.40 GHz"):
+            text()
+
+        with tag("resultfiles"):
+            text("**/error.log")
+
         for cmdtask in cmdtasks:
             for includesfile in cmdtask.includesfiles:
                 with tag("rundefinition", name=f"{cmdtask.name},{includesfile}"):
@@ -42,37 +112,37 @@ def generate_xml(timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: Lis
                             text(option)
                     with tag("tasks", name="task"):
                         with tag("includesfile"):
-                            text(includesfile)
+                            text(f"benchmarks/{includesfile}")
+
+        with tag("propertyfile"):
+            text("benchmarks/properties/SMT.prp")
 
+    file = cachedir.joinpath(f"{tool_module_name}.xml")
     file.write_text(indent(doc.getvalue()))
 
 
-def cmdtask_for_submission(s: defs.Submission, cachedir: Path) -> List[CmdTask]:
+def get_suffix(track: defs.Track) -> str:
+    match track:
+        case defs.Track.Incremental:
+            return "_inc"
+        case defs.Track.ModelValidation:
+            return "_model"
+        case _:
+            return ""
+
+
+def cmdtask_for_submission(s: defs.Submission, cachedir: Path, target_track: defs.Track) -> List[CmdTask]:
     res: List[CmdTask] = []
     i = -1
     for p in s.participations.root:
         command = cast(defs.Command, p.command if p.command else s.command)
         archive = cast(defs.Archive, p.archive if p.archive else s.archive)
         for track, divisions in p.get().items():
+            if track != target_track:
+                continue
+
             i = i + 1
-            match track:
-                case defs.Track.Incremental:
-                    suffix = "_inc"
-                    mode = "trace"
-                case defs.Track.ModelValidation:
-                    suffix = "_model"
-                    mode = "direct"
-                case defs.Track.UnsatCore:
-                    suffix = ""
-                    mode = "direct"
-                case defs.Track.ProofExhibition:
-                    suffix = ""
-                    mode = "direct"
-                case defs.Track.SingleQuery:
-                    suffix = ""
-                    mode = "direct"
-                case defs.Track.Cloud | defs.Track.Parallel:
-                    continue
+            suffix = get_suffix(track)
             tasks: list[str] = []
             for _, logics in divisions.items():
                 tasks.extend([str(logic) + suffix for logic in logics])
@@ -83,26 +153,14 @@ def cmdtask_for_submission(s: defs.Submission, cachedir: Path) -> List[CmdTask]:
                     assert command.arguments == []
                     dirname = str(relpath(executable_path.parent, start=str(cachedir)))
 
-                    if mode == "direct":
-                        options = [
-                            "bash",
-                            "-c",
-                            f'FILE=$(realpath $1); (cd {shlex.quote(dirname)}; exec ./{shlex.quote(executable_path.name)} "$FILE")',
-                            "compa_starexec",
-                        ]
-                    else:
-                        assert mode == "trace"
-                        options = [
-                            "bash",
-                            "-c",
-                            f'ROOT=$(pwd); FILE=$(realpath $1); (cd {shlex.quote(dirname)}; exec $ROOT/smtlib2_trace_executor ./{shlex.quote(executable_path.name)} "$FILE")',
-                            "compa_starexec",
-                        ]
+                    options = [
+                        "bash",
+                        "-c",
+                        f'FILE=$(realpath $1); (cd {shlex.quote(dirname)}; exec ./{shlex.quote(executable_path.name)} "$FILE")',
+                        "compa_starexec",
+                    ]
                 else:
-                    if mode == "direct":
-                        options = [executable] + command.arguments
-                    else:
-                        options = ["./smtlib2_trace_executor", executable] + command.arguments
+                    options = [executable] + command.arguments
                 cmdtask = CmdTask(
                     name=f"{s.name},{i},{track}",
                     options=options,
diff --git a/smtcomp/defs.py b/smtcomp/defs.py
index 695e5384..f278b8c8 100644
--- a/smtcomp/defs.py
+++ b/smtcomp/defs.py
@@ -138,6 +138,7 @@ class Status(EnumAutoInt):
     Unsat = "unsat"
     Sat = "sat"
     Unknown = "unknown"
+    Incremental = "incremental"
 
 
 class Track(EnumAutoInt):
diff --git a/smtcomp/execution.py b/smtcomp/execution.py
index 0f37cbb6..85c00666 100644
--- a/smtcomp/execution.py
+++ b/smtcomp/execution.py
@@ -33,4 +33,8 @@ def unpack_trace_executor(dst: Path) -> None:
 
 def copy_tool_module(dst: Path) -> None:
     script_path = Path(__file__).parent
-    subprocess.run(["cp", script_path / "tool.py", dst])
+    tools = dst / "tools"
+    tools.mkdir(parents=True, exist_ok=True)
+    subprocess.run(["cp", script_path / "tool.py", tools])
+    subprocess.run(["cp", script_path / "incremental_tool.py", tools])
+    subprocess.run(["touch", "__init__.py"])
diff --git a/smtcomp/generate_benchmarks.py b/smtcomp/generate_benchmarks.py
index af819401..056fc81e 100644
--- a/smtcomp/generate_benchmarks.py
+++ b/smtcomp/generate_benchmarks.py
@@ -1,6 +1,7 @@
 from typing import Set, Dict
 from pathlib import Path
 from smtcomp import defs
+from smtcomp.benchexec import generate_benchmark_yml
 
 
 def path_trivial_benchmark(dst: Path, track: defs.Track, logic: defs.Logic, status: defs.Status) -> Path:
@@ -15,15 +16,22 @@ def path_trivial_benchmark(dst: Path, track: defs.Track, logic: defs.Logic, stat
             raise (ValueError("No trivial benchmarks yet for f{track}"))
     match status:
         case defs.Status.Sat:
-            return dst.joinpath("files", str(logic) + suffix + ".sat.smt2")
+            return dst.joinpath(str(logic) + suffix + ".sat.smt2")
         case defs.Status.Unsat:
-            return dst.joinpath("files", str(logic) + suffix + ".unsat.smt2")
+            return dst.joinpath(str(logic) + suffix + ".unsat.smt2")
+        case defs.Status.Incremental:
+            return dst.joinpath(str(logic) + suffix + ".incremental.smt2")
         case defs.Status.Unknown:
             raise (ValueError("No trivial benchmarks yet for unknown"))
 
 
 def generate_trivial_benchmarks(dst: Path) -> None:
+    prop_dir = dst.joinpath("properties")
+    prop_dir.mkdir(parents=True, exist_ok=True)
+    (prop_dir / "SingleQuery.prp").touch()
+
     dst.joinpath("files").mkdir(parents=True, exist_ok=True)
+    dst.joinpath("files_inc").mkdir(parents=True, exist_ok=True)
     for track, divisions in defs.tracks.items():
         match track:
             case defs.Track.Incremental:
@@ -36,15 +44,61 @@ def generate_trivial_benchmarks(dst: Path) -> None:
                 continue
         for _, theories in divisions.items():
             for theory in theories:
-                file = dst.joinpath(str(theory) + suffix)
-                file_sat = path_trivial_benchmark(dst, track, theory, defs.Status.Sat)
-                file_unsat = path_trivial_benchmark(dst, track, theory, defs.Status.Unsat)
+                theory_name = str(theory)
+                theory_dir = dst.joinpath(f"files{suffix}", theory_name)
+                theory_dir.mkdir(parents=True, exist_ok=True)
+                file = dst.joinpath(theory_name + suffix)
+
+                if track == defs.Track.Incremental:
+                    file_incremental = path_trivial_benchmark(theory_dir, track, theory, defs.Status.Incremental)
+
+                    file.write_text(f"files{suffix}/{theory_name}/*.smt2\n")
 
-                file.write_text("\n".join([str(file_sat.relative_to(dst)), str(file_unsat.relative_to(dst))]))
+                    benchmark = "\n".join([
+                        "sat",
+                        "sat",
+                        "unsat",
+                        "--- BENCHMARK BEGINS HERE ---",
+                        f"(set-logic {theory.value})",
+                        "(assert true)",
+                        "(check-sat)",
+                        "(assert true)",
+                        "(check-sat)",
+                        "(assert false)",
+                        "(check-sat)\n",
+                    ])
+                    file_incremental.write_text(benchmark)
+                else:
+                    file_sat = path_trivial_benchmark(theory_dir, track, theory, defs.Status.Sat)
+                    file_unsat = path_trivial_benchmark(theory_dir, track, theory, defs.Status.Unsat)
+                    file.write_text(f"files{suffix}/{theory_name}/*.yml\n")
 
-                file_sat.write_text(f"(set-logic {theory.value})(check-sat)")
-                file_unsat.write_text(f"(set-logic {theory.value})(assert false)(check-sat)")
+                    file_sat.write_text(f"(set-logic {theory.value})(check-sat)")
+                    file_unsat.write_text(f"(set-logic {theory.value})(assert false)(check-sat)")
+
+                    generate_benchmark_yml(file_sat, True, None)
+                    generate_benchmark_yml(file_unsat, False, None)
 
 
 def generate_benchmarks(dst: Path, seed: int) -> None:
-    return
+    prop_dir = dst.joinpath("properties")
+    prop_dir.mkdir(parents=True, exist_ok=True)
+    (prop_dir / "SMT.prp").touch()
+
+    dst.joinpath("files").mkdir(parents=True, exist_ok=True)
+    dst.joinpath("files_inc").mkdir(parents=True, exist_ok=True)
+    for track, divisions in defs.tracks.items():
+        match track:
+            case defs.Track.Incremental:
+                suffix = "_inc"
+            case defs.Track.ModelValidation:
+                suffix = "_model"
+            case defs.Track.SingleQuery:
+                suffix = ""
+            case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
+                continue
+        for _, theories in divisions.items():
+            for theory in theories:
+                theory_name = str(theory)
+                file = dst.joinpath(theory_name + suffix)
+                file.write_text(f"files{suffix}/{theory_name}/*.yml\n")
diff --git a/smtcomp/incremental_tool.py b/smtcomp/incremental_tool.py
new file mode 100644
index 00000000..40de13a5
--- /dev/null
+++ b/smtcomp/incremental_tool.py
@@ -0,0 +1,86 @@
+from typing import List, Optional, Any
+import benchexec.util as util
+import benchexec.result as result
+from benchexec.tools.template import BaseTool2
+import sys, re
+import os
+
+TRACE_EXECUTOR = "./smtlib2_trace_executor"
+
+
+class IncrementalSMTCompTool(BaseTool2):  # type: ignore
+    """
+    Generic incremental tool for smtcomp execution
+    """
+
+    REQUIRED_PATHS = ["."]
+    EXECUTABLE = "./false"
+    NAME = "SMT-COMP generic incremental tool"
+
+    def determine_result(self, run: BaseTool2.Run) -> Any:  # type: ignore
+        returncode: int = run.exit_code.value
+        returnsignal: int = run.exit_code.signal
+        output: List[str] = run.output
+        isTimeout: bool = run.was_timeout
+
+        correct = 0
+        status = None
+
+        for line in output:
+            line = line.strip()
+            if line in ("sat", "unsat"):
+                correct += 1
+            if line.startswith("WRONG"):
+                return "WRONG"
+
+        if returnsignal is None:
+            status = "DONE"
+        elif ((returnsignal == 9) or (returnsignal == 15)) and isTimeout:
+            status = "TIMEOUT"
+        elif returnsignal == 9:
+            status = "KILLED BY SIGNAL 9"
+        elif returnsignal == 6:
+            status = "ABORTED"
+        elif returnsignal == 15:
+            status = "KILLED"
+        else:
+            status = "ERROR"
+
+        return f"{status} ({correct} correct)"
+
+    def executable(self, _: Any) -> str | Any | None:
+        return util.find_executable(self.EXECUTABLE, exitOnError=True)
+
+    def version(self, executable: str) -> str:
+        return ""
+
+    def name(self) -> str:
+        return self.NAME
+
+    def cmdline(  # type: ignore
+        self,
+        executable: str,
+        options: List[str],
+        task: BaseTool2.Task,
+        rlimits: BaseTool2.ResourceLimits,
+    ) -> Any:
+        tasks = task.input_files
+        options = options + ([] if task.options is None else task.options)
+        assert len(tasks) <= 1, "only one inputfile supported"
+        if options:
+            # executable and options were overridden by the task definition
+            return [TRACE_EXECUTOR, *options, *tasks]
+        else:
+            # using default executable
+            return [TRACE_EXECUTOR, executable, *tasks]
+
+    def program_files(self, executable: str) -> Any:
+        files = [TRACE_EXECUTOR, executable] + self._program_files_from_executable(executable, self.REQUIRED_PATHS)
+        return files
+
+    @staticmethod
+    def _program_files_from_executable(executable: str, required_paths: list[str]) -> Any:
+        scriptdir = os.path.dirname(os.path.abspath(__file__))
+        basedir = os.path.join(scriptdir, os.path.pardir)
+
+        return util.flatten(util.expand_filename_pattern(path, basedir) for path in required_paths)
diff --git a/smtcomp/main.py b/smtcomp/main.py
index e36f38d4..5f7101ee 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -150,7 +150,6 @@ def prepare_execution(dst: Path) -> None:
 @app.command(rich_help_panel=benchexec_panel)
 def generate_benchexec(
     files: List[Path],
-    dst: Path,
     cachedir: Path,
     timelimit_s: int = defs.Config.timelimit_s,
     memlimit_M: int = defs.Config.memlimit_M,
@@ -159,14 +158,23 @@ def generate_benchexec(
     """
     Generate the benchexec file for the given submissions
     """
-    cmdtasks: List[benchexec.CmdTask] = []
     for file in track(files):
         s = submission.read(str(file))
-        res = benchexec.cmdtask_for_submission(s, cachedir)
-        cmdtasks.extend(res)
-    benchexec.generate_xml(
-        timelimit_s=timelimit_s, memlimit_M=memlimit_M, cpuCores=cpuCores, cmdtasks=cmdtasks, file=dst
-    )
+
+        for target_track in [defs.Track.SingleQuery, defs.Track.Incremental]:
+            tool_module_name = benchexec.tool_module_name(s, target_track)
+            benchexec.generate_tool_module(s, cachedir, target_track)
+
+            res = benchexec.cmdtask_for_submission(s, cachedir, target_track)
+            if res:
+                benchexec.generate_xml(
+                    timelimit_s=timelimit_s,
+                    memlimit_M=memlimit_M,
+                    cpuCores=cpuCores,
+                    cmdtasks=res,
+                    cachedir=cachedir,
+                    tool_module_name=tool_module_name,
+                )
 
 
 # Should be moved somewhere else
@@ -530,6 +538,7 @@ def select_and_scramble(
     dstdir: Path,
     scrambler: Path,
     max_workers: int = 8,
+    test: bool = False,
 ) -> None:
     """
     Selects and scrambles the benchmarks and
@@ -539,6 +548,13 @@ def select_and_scramble(
     """
 
     config = defs.Config(data)
+
+    if test:
+        config.min_used_benchmarks = 20
+        config.ratio_of_used_benchmarks = 0.0
+        config.invert_triviality = True
+        config.seed = 1
+
     smtcomp.scramble_benchmarks.select_and_scramble(competition_track, config, srcdir, dstdir, scrambler, max_workers)
 
 
diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py
index 389a902f..ca625e29 100644
--- a/smtcomp/scramble_benchmarks.py
+++ b/smtcomp/scramble_benchmarks.py
@@ -4,8 +4,22 @@
 import subprocess
 import concurrent.futures
 import smtcomp.defs as defs
+from smtcomp.benchexec import generate_benchmark_yml
 import polars as pl
 import smtcomp.selection
+from typing import Optional
+import re
+
+status_pattern = re.compile(r"(set-info :status (sat|unsat|unknown))")
+
+
+def get_expected_result(benchmark: Path) -> Optional[bool]:
+    for line in open(benchmark).readlines():
+        m = status_pattern.search(line)
+        if m and m.group(2) != "unknown":
+            return m.group(2) == "sat"
+
+    return None
 
 
 def scramble_file(fdict: dict, incremental: bool, srcdir: Path, dstdir: Path, args: list) -> None:
@@ -36,6 +50,9 @@ def scramble_file(fdict: dict, incremental: bool, srcdir: Path, dstdir: Path, ar
     else:
         subprocess.run(args, stdin=fsrc.open("r"), stdout=fdst.open("w"))
 
+    expected = get_expected_result(fsrc) if not incremental else None
+    generate_benchmark_yml(fdst, expected, fsrc.relative_to(srcdir))
+
 
 def create_scramble_id(benchmarks: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
     files = benchmarks.sort("file").select(pl.col("file").shuffle(seed=config.seed))
@@ -134,11 +151,7 @@ def select_and_scramble(
         case defs.Track.SingleQuery:
             selected = smtcomp.selection.helper_compute_sq(config)
         case defs.Track.Incremental:
-            selected = smtcomp.selection.helper_compute_sq(config)
-            rich.print(
-                f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
-            )
-            exit(1)
+            selected = smtcomp.selection.helper_compute_incremental(config)
         case defs.Track.ModelValidation:
             selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(
diff --git a/smtcomp/selection.py b/smtcomp/selection.py
index 8dc7fde9..e7dd216a 100644
--- a/smtcomp/selection.py
+++ b/smtcomp/selection.py
@@ -61,8 +61,8 @@ def add_trivial_run_info(benchmarks: pl.LazyFrame, previous_results: pl.LazyFram
     )
 
 
-def sq_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
-    used_logics = defs.logic_used_for_track(defs.Track.SingleQuery)
+def track_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config, target_track: defs.Track) -> pl.LazyFrame:
+    used_logics = defs.logic_used_for_track(target_track)
 
     # Keep only logics used by single query
     b = benchmarks_with_info.filter(c_logic.is_in(set(map(int, used_logics))))
@@ -128,7 +128,21 @@ def helper_compute_sq(config: defs.Config) -> pl.LazyFrame:
         trivial_in_logic = pl.col("trivial").any().over(["logic"])
         inverted_or_not_trivial = pl.when(trivial_in_logic).then(pl.col("trivial").not_()).otherwise(pl.col("trivial"))
         benchmarks_with_info = benchmarks_with_info.with_columns(trivial=inverted_or_not_trivial)
-    return sq_selection(benchmarks_with_info, config)
+    return track_selection(benchmarks_with_info, config, defs.Track.SingleQuery)
+
+
+def helper_compute_incremental(config: defs.Config) -> pl.LazyFrame:
+    """
+    Returned columns: file (uniq id), logic, family,name, status, asserts nunmber, trivial, run (in previous year), new (benchmarks), selected
+    """
+    benchmarks = pl.read_ipc(config.cached_incremental_benchmarks)
+    results = pl.read_ipc(config.cached_previous_results)
+    benchmarks_with_info = add_trivial_run_info(benchmarks.lazy(), results.lazy(), config)
+    if config.invert_triviality:
+        trivial_in_logic = pl.col("trivial").any().over(["logic"])
+        inverted_or_not_trivial = pl.when(trivial_in_logic).then(pl.col("trivial").not_()).otherwise(pl.col("trivial"))
+        benchmarks_with_info = benchmarks_with_info.with_columns(trivial=inverted_or_not_trivial)
+    return track_selection(benchmarks_with_info, config, defs.Track.Incremental)
 
 
 def competitive_logics(config: defs.Config) -> pl.LazyFrame:
diff --git a/smtcomp/submission.py b/smtcomp/submission.py
index b27db82d..35393559 100644
--- a/smtcomp/submission.py
+++ b/smtcomp/submission.py
@@ -130,4 +130,4 @@ def merge_all_submissions(local_repo_path: str) -> None:
             shas = [p.head.sha for p in fpulls]
             message = "merge submissions\n\n" + "\n".join(f"#{p.number}: {p.title}" for p in fpulls)
             print(shas)
-            subprocess.run(["git", "-C", local_repo_path, "merge", "-m", message, "-s", "ours"] + shas)
+            subprocess.run(["git", "-C", local_repo_path, "merge", "-m", message] + shas)
diff --git a/smtcomp/tool.py b/smtcomp/tool.py
index cd3d7278..6a7144a5 100644
--- a/smtcomp/tool.py
+++ b/smtcomp/tool.py
@@ -3,16 +3,17 @@
 import benchexec.result as result
 from benchexec.tools.template import BaseTool2
 import sys, re
+import os
 
-fallback_name = "./false"
 
-
-class Tool(BaseTool2):  # type: ignore
+class SMTCompTool(BaseTool2):  # type: ignore
     """
     Generic tool for smtcomp execution
     """
 
-    REQUIRED_PATHS = ["unpack"]
+    REQUIRED_PATHS = ["."]
+    EXECUTABLE = "./false"
+    NAME = "SMT-COMP generic tool"
 
     def determine_result(self, run: BaseTool2.Run) -> Any:  # type: ignore
         """Adaptation of Jochen Hoenicke process script
@@ -62,13 +63,13 @@ def determine_result(self, run: BaseTool2.Run) -> Any:  # type: ignore
         return status
 
     def executable(self, _: Any) -> str | Any | None:
-        return util.find_executable("smtlib2_trace_executor", fallback=fallback_name, exitOnError=False)
+        return util.find_executable(self.EXECUTABLE, exitOnError=True)
 
     def version(self, executable: str) -> str:
         return ""
 
     def name(self) -> str:
-        return "SC"
+        return self.NAME
 
     def cmdline(  # type: ignore
         self,
@@ -80,6 +81,21 @@ def cmdline(  # type: ignore
         tasks = task.input_files
         options = options + ([] if task.options is None else task.options)
         assert len(tasks) <= 1, "only one inputfile supported"
-        assert len(options) >= 1, "options give the command to run"
 
-        return [executable, *options, *tasks]
+        if options:
+            # executable and options were overridden by the task definition
+            return [*options, *tasks]
+        else:
+            # using default executable
+            return [executable, *tasks]
+
+    def program_files(self, executable: str) -> Any:
+        files = [executable] + self._program_files_from_executable(executable, self.REQUIRED_PATHS)
+        return files
+
+    @staticmethod
+    def _program_files_from_executable(executable: str, required_paths: list[str]) -> Any:
+        scriptdir = os.path.dirname(os.path.abspath(__file__))
+        basedir = os.path.join(scriptdir, os.path.pardir)
+
+        return util.flatten(util.expand_filename_pattern(path, basedir) for path in required_paths)
diff --git a/smtcomp/unpack.py b/smtcomp/unpack.py
index 75b25ee3..b2b4e9f1 100644
--- a/smtcomp/unpack.py
+++ b/smtcomp/unpack.py
@@ -14,6 +14,14 @@
 ZIP_UNIX_SYSTEM = 3
 
 
+def is_zip(file: Path) -> bool:
+    try:
+        with ZipFile(file, "r") as zf:
+            return zf.testzip() is None
+    except Exception as ex:
+        return False
+
+
 def zip_extract_all_with_executable_permission(file: Path, target_dir: Path) -> None:
     # extract by calling `unzip`, because ZipFile does not handle symlinks
     # https://stackoverflow.com/questions/19737570/how-do-i-preserve-symlinks-when-unzipping-an-archive-using-python
@@ -35,7 +43,7 @@ def tar_extract_all_with_executable_permission(file: Path, target_dir: Path) ->
 
 
 def extract_all_with_executable_permission(file: Path, target_dir: Path) -> None:
-    if str(file).endswith(".zip"):
+    if is_zip(file):
         zip_extract_all_with_executable_permission(file, target_dir)
     else:
         tar_extract_all_with_executable_permission(file, target_dir)

From 443d35dc3df9afa8abe3596a50ffbbb5f6868089 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 11:33:32 +0200
Subject: [PATCH 06/15] Fix trivial benchmarks path

---
 smtcomp/generate_benchmarks.py | 41 ++++++++++++++++++++--------------
 tests/test_validate.py         | 18 +++++++++++++++
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/smtcomp/generate_benchmarks.py b/smtcomp/generate_benchmarks.py
index 056fc81e..c9cddaaf 100644
--- a/smtcomp/generate_benchmarks.py
+++ b/smtcomp/generate_benchmarks.py
@@ -5,22 +5,29 @@
 
 
 def path_trivial_benchmark(dst: Path, track: defs.Track, logic: defs.Logic, status: defs.Status) -> Path:
+    """
+    dst is the root of the generated benchmarks directory
+    """
     match track:
         case defs.Track.Incremental:
+            assert status == defs.Status.Incremental
             suffix = "_inc"
         case defs.Track.ModelValidation:
+            assert status != defs.Status.Incremental
             suffix = "_model"
         case defs.Track.SingleQuery:
+            assert status != defs.Status.Incremental
             suffix = ""
         case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
             raise (ValueError("No trivial benchmarks yet for f{track}"))
+    logic_dir = dst.joinpath(f"files{suffix}", str(logic))
     match status:
         case defs.Status.Sat:
-            return dst.joinpath(str(logic) + suffix + ".sat.smt2")
+            return logic_dir.joinpath(str(logic) + suffix + ".sat.smt2")
         case defs.Status.Unsat:
-            return dst.joinpath(str(logic) + suffix + ".unsat.smt2")
+            return logic_dir.joinpath(str(logic) + suffix + ".unsat.smt2")
         case defs.Status.Incremental:
-            return dst.joinpath(str(logic) + suffix + ".incremental.smt2")
+            return logic_dir.joinpath(str(logic) + suffix + ".incremental.smt2")
         case defs.Status.Unknown:
             raise (ValueError("No trivial benchmarks yet for unknown"))
 
@@ -42,24 +49,24 @@ def generate_trivial_benchmarks(dst: Path) -> None:
                 suffix = ""
             case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
                 continue
-        for _, theories in divisions.items():
-            for theory in theories:
-                theory_name = str(theory)
-                theory_dir = dst.joinpath(f"files{suffix}", theory_name)
-                theory_dir.mkdir(parents=True, exist_ok=True)
-                file = dst.joinpath(theory_name + suffix)
+        for _, logics in divisions.items():
+            for logic in logics:
+                logic_name = str(logic)
+                logic_dir = dst.joinpath(f"files{suffix}", logic_name)
+                logic_dir.mkdir(parents=True, exist_ok=True)
+                file = dst.joinpath(logic_name + suffix)
 
                 if track == defs.Track.Incremental:
-                    file_incremental = path_trivial_benchmark(theory_dir, track, theory, defs.Status.Incremental)
+                    file_incremental = path_trivial_benchmark(dst, track, logic, defs.Status.Incremental)
 
-                    file.write_text(f"files{suffix}/{theory_name}/*.smt2\n")
+                    file.write_text(f"files{suffix}/{logic_name}/*.smt2\n")
 
                     benchmark = "\n".join([
                         "sat",
                         "sat",
                         "unsat",
                         "--- BENCHMARK BEGINS HERE ---",
-                        f"(set-logic {theory.value})",
+                        f"(set-logic {logic.value})",
                         "(assert true)",
                         "(check-sat)",
                         "(assert true)",
@@ -69,12 +76,12 @@ def generate_trivial_benchmarks(dst: Path) -> None:
                     ])
                     file_incremental.write_text(benchmark)
                 else:
-                    file_sat = path_trivial_benchmark(theory_dir, track, theory, defs.Status.Sat)
-                    file_unsat = path_trivial_benchmark(theory_dir, track, theory, defs.Status.Unsat)
-                    file.write_text(f"files{suffix}/{theory_name}/*.yml\n")
+                    file_sat = path_trivial_benchmark(dst, track, logic, defs.Status.Sat)
+                    file_unsat = path_trivial_benchmark(dst, track, logic, defs.Status.Unsat)
+                    file.write_text(f"files{suffix}/{logic_name}/*.yml\n")
 
-                    file_sat.write_text(f"(set-logic {theory.value})(check-sat)")
-                    file_unsat.write_text(f"(set-logic {theory.value})(assert false)(check-sat)")
+                    file_sat.write_text(f"(set-logic {logic.value})(check-sat)")
+                    file_unsat.write_text(f"(set-logic {logic.value})(assert false)(check-sat)")
 
                     generate_benchmark_yml(file_sat, True, None)
                     generate_benchmark_yml(file_unsat, False, None)
diff --git a/tests/test_validate.py b/tests/test_validate.py
index c72ee3af..5fca6113 100644
--- a/tests/test_validate.py
+++ b/tests/test_validate.py
@@ -6,7 +6,9 @@
 
 from smtcomp.convert_csv import convert_csv
 from smtcomp.main import app
+import smtcomp.defs as defs
 from smtcomp.submission import read
+from smtcomp.generate_benchmarks import generate_trivial_benchmarks, path_trivial_benchmark
 
 runner = CliRunner()
 good_cases = ["tests/test1.json", "submissions/template/template.json", "submissions/template/template_aws_tracks.json"]
@@ -38,3 +40,19 @@ def test_show_json(name: str) -> None:
 @pytest.mark.parametrize("submission", submissions)
 def test_submission(submission: str) -> None:
     read(submission)
+
+
+def test_generate_trivial(tmp_path: Path) -> None:
+    generate_trivial_benchmarks(tmp_path)
+    for track, divisions in defs.tracks.items():
+        match track:
+            case defs.Track.Incremental:
+                statuses = [defs.Status.Incremental]
+            case defs.Track.ModelValidation | defs.Track.SingleQuery:
+                statuses = [defs.Status.Unsat, defs.Status.Sat]
+            case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
+                continue
+        for _, logics in divisions.items():
+            for logic in logics:
+                for status in statuses:
+                    assert path_trivial_benchmark(tmp_path, track, logic, status).exists()

From f497c8341b6fb01ed1750f60180fc0f2e6c7aa78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 12:11:37 +0200
Subject: [PATCH 07/15] Parse results from benchexec

---
 .gitignore         |   1 +
 smtcomp/results.py | 161 +++++++++++++++++++++++++++++++++++++++++++++
 smtcomp/unpack.py  |   5 +-
 3 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 smtcomp/results.py

diff --git a/.gitignore b/.gitignore
index e23048c8..63837ac0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -170,3 +170,4 @@ cython_debug/
 schema_doc.css
 schema_doc.min.js
 *.feather
+tmp/
diff --git a/smtcomp/results.py b/smtcomp/results.py
new file mode 100644
index 00000000..7972ca28
--- /dev/null
+++ b/smtcomp/results.py
@@ -0,0 +1,161 @@
+from typing import Optional, Iterator
+import smtcomp.defs as defs
+import polars as pl
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from smtcomp.unpack import read_cin
+from pydantic import BaseModel
+from datetime import timedelta
+from zipfile import ZipFile
+
+
+class RunId(BaseModel):
+    solver: str
+    participation: int
+    track: defs.Track
+    includefile: str
+
+    @classmethod
+    def unmangle(cls, s: str) -> "RunId":
+        name_l = s.split(",")
+        return RunId(
+            solver=name_l[0],
+            participation=int(name_l[1]),
+            track=defs.Track(name_l[2]),
+            # The name "task" is currently added at the end, name of the task
+            includefile=name_l[3].split(".")[0],
+        )
+
+    def mangle(self) -> str:
+        return ",".join([self.solver, str(self.participation), str(self.track), self.includefile])
+
+
+class Run(BaseModel):
+    basename: str
+    cputime_s: float
+    """ example: 0.211880968s"""
+    memory_B: int
+    """ example: 21127168B"""
+    status: str
+    """ example: true"""
+    walltime_s: float
+    """ example: 0.21279739192686975s"""
+
+    # host: str
+    # """ example: pontos07"""
+    # blkio-read: str
+    # """ example: 0B """
+    # blkio-write: str
+    # """ example: 0B """
+    # category: str
+    # """ example: missing """
+    # cpuCores: str
+    # """ example: 0 """
+    # cputime-cpu0: str
+    # """ example: 0.211880968s """
+    # memoryNodes: str
+    # """ example: 0 """
+    # returnvalue: str
+    # """ example: 1 """
+    # starttime: str
+    # """ example: 2024-06-07T19:57:32.499898+02:00"""
+    # vcloud-additionalEnvironment: str
+    # """ example: """
+    # vcloud-cpuCoresDetails: str
+    # """ example: [Processor{0, core=0, socket=0} """
+    # vcloud-debug: str
+    # """ example: false """
+    # vcloud-generatedFilesCount: str
+    # """ example: 1 """
+    # vcloud-matchedResultFilesCount: str
+    # """ example: 1 """
+    # vcloud-maxLogFileSize: str
+    # """ example: 20 MB """
+    # vcloud-memoryNodesAllocation: str
+    # """ example: {0=2.0 GB} """
+    # vcloud-newEnvironment: str
+    # """ example: """
+    # vcloud-outerwalltime: str
+    # """ example: 0.554s """
+    # vcloud-runId: str
+    # """ example: 6a08ebbd-2af2-4c18-af44-429a0439fab """
+
+
+class Results(BaseModel):
+    runid: RunId
+    options: str
+    runs: list[Run]
+
+
+def parse_time(s: str) -> float:
+    assert s[-1] == "s"
+    return float(s[:-1])
+
+
+def parse_size(s: str) -> int:
+    assert s[-1] == "B"
+    return int(s[:-1])
+
+
+def convert_run(r: ET.Element) -> Run:
+    basename = Path(r.attrib["name"]).name
+    cputime_s: Optional[float] = None
+    memory_B: Optional[int] = None
+    status: Optional[str] = None
+    walltime_s: Optional[float] = None
+
+    for col in r.iterfind("column"):
+        value = col.attrib["value"]
+        match col.attrib["title"]:
+            case "cputime":
+                cputime_s = parse_time(value)
+            case "memory":
+                memory_B = parse_size(value)
+            case "status":
+                status = value
+            case "walltime":
+                walltime_s = parse_time(value)
+
+    if cputime_s is None or memory_B is None or status is None or walltime_s is None:
+        raise ValueError("xml of results doesn't contains some expected column")
+
+    return Run(basename=basename, cputime_s=cputime_s, memory_B=memory_B, status=status, walltime_s=walltime_s)
+
+
+def parse_xml(file: Path) -> Results:
+    result = ET.fromstring(read_cin(file))
+    runs = list(map(convert_run, result.iterfind("run")))
+    return Results(runid=RunId.unmangle(result.attrib["name"]), options=result.attrib["options"], runs=runs)
+
+
+def to_pl(r: Results) -> pl.LazyFrame:
+    lf = pl.LazyFrame(r.runs)
+    return lf.with_columns(solver=pl.lit(r.runid.solver), participation=r.runid.participation, track=int(r.runid.track))
+
+
+def parse_dir(dir: Path) -> pl.LazyFrame:
+    return pl.concat(map(lambda file: to_pl(parse_xml(file)), dir.glob("*.xml.bz2")))
+
+
+def log_filename(dir: Path) -> Path:
+    l = list(dir.glob("*.logfiles.zip"))
+    if len(l) != 1:
+        raise (ValueError(f"Directory {dir!r} doesn't contains *.logfiles.zip archive"))
+    return l[0]
+
+
+class LogFile:
+    def __init__(self: "LogFile", dir: Path) -> None:
+        filename = log_filename(dir)
+        self.logfiles = ZipFile(filename)
+        self.name = Path(filename.name.removesuffix(".zip"))
+
+    def __enter__(self: "LogFile") -> "LogFile":
+        return self
+
+    def __exit__(self: "LogFile") -> None:
+        self.logfiles.close()
+
+    def get_log(self: "LogFile", r: RunId, basename: str) -> str:
+        p = str(self.name.joinpath(".".join([r.mangle(), basename, "log"])))
+        return self.logfiles.read(p).decode()
diff --git a/smtcomp/unpack.py b/smtcomp/unpack.py
index b2b4e9f1..a06285c6 100644
--- a/smtcomp/unpack.py
+++ b/smtcomp/unpack.py
@@ -5,7 +5,7 @@
 from zipfile import ZipFile
 import tarfile
 from stat import S_IXUSR
-import gzip
+import gzip, bz2
 import io
 from typing import AnyStr, cast, IO
 from subprocess import check_output, STDOUT
@@ -62,5 +62,8 @@ def read_cin(file: Path) -> str:
     if file.name.endswith(".gz"):
         with gzip.open(file, "rt") as f:
             return f.read()
+    elif file.name.endswith(".bz2"):
+        with bz2.open(file, "rt") as f:
+            return f.read()
     else:
         return file.read_text()

From 4a65c8119b2812f9a72d5e0f4820b991e750344c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 16:02:02 +0200
Subject: [PATCH 08/15] Remove benchexec header from solver output

---
 smtcomp/results.py | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/smtcomp/results.py b/smtcomp/results.py
index 7972ca28..1adacf6d 100644
--- a/smtcomp/results.py
+++ b/smtcomp/results.py
@@ -1,4 +1,5 @@
-from typing import Optional, Iterator
+from typing import Optional, Iterator, Any
+import functools
 import smtcomp.defs as defs
 import polars as pl
 import xml.etree.ElementTree as ET
@@ -144,6 +145,20 @@ def log_filename(dir: Path) -> Path:
     return l[0]
 
 
+### Benchexec add this header
+# output_file.write(
+#     " ".join(map(util.escape_string_shell, args))
+#     + "\n\n\n"
+#     + "-" * 80
+#     + "\n\n\n"
+# )
+
+
+@functools.cache
+def benchexec_log_separator() -> str:
+    return "\n\n\n" + "-" * 80 + "\n\n\n"
+
+
 class LogFile:
     def __init__(self: "LogFile", dir: Path) -> None:
         filename = log_filename(dir)
@@ -153,9 +168,26 @@ def __init__(self: "LogFile", dir: Path) -> None:
     def __enter__(self: "LogFile") -> "LogFile":
         return self
 
-    def __exit__(self: "LogFile") -> None:
+    def __exit__(self: "LogFile", exc_type: Any, exc_value: Any, traceback: Any) -> None:
+        self.close()
+
+    def close(self) -> None:
         self.logfiles.close()
 
     def get_log(self: "LogFile", r: RunId, basename: str) -> str:
+        """
+        Return the output of the prover and the header with the commandline used
+        """
         p = str(self.name.joinpath(".".join([r.mangle(), basename, "log"])))
         return self.logfiles.read(p).decode()
+
+    def get_output(self: "LogFile", r: RunId, basename: str) -> str:
+        """
+        Return the output of the prover
+        """
+        s = self.get_log(r, basename)
+        index = s.find(benchexec_log_separator())
+        if index == -1:
+            raise ValueError(f"Log Header not found {r!r} {basename!r}")
+        index += len(benchexec_log_separator())
+        return s[index:]

From accb49b677cf7b049d8bc22ce06245dcd333d4a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 16:02:43 +0200
Subject: [PATCH 09/15] Update trace-executor

  This a version that accept option for the solver
---
 smtcomp/execution.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/smtcomp/execution.py b/smtcomp/execution.py
index 85c00666..de31cefa 100644
--- a/smtcomp/execution.py
+++ b/smtcomp/execution.py
@@ -6,13 +6,11 @@
 
 
 def trace_executor_url() -> str:
-    return (
-        "https://github.com/SMT-COMP/trace-executor/releases/download/smtcomp2022/SMT-COMP-2022-trace-executor.tar.gz"
-    )
+    return "https://github.com/SMT-COMP/trace-executor/releases/download/smtcomp2024-rc1/SMT-COMP-2024-trace-executor.tar.gz"
 
 
 def trace_executor_filename() -> str:
-    return "SMT-COMP-2022-trace-executor.tar.gz"
+    return "SMT-COMP-2024-trace-executor.tar.gz"
 
 
 def download_trace_executor(dst: Path) -> None:

From 374e2fce60d71dd6037dff2a02205506884eeb0a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 16:58:27 +0200
Subject: [PATCH 10/15] Use directly the path given for the command

    Except when starexec compatibility used
---
 smtcomp/archive.py   |  4 ++++
 smtcomp/benchexec.py | 14 +++++++++-----
 smtcomp/main.py      |  3 +++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/smtcomp/archive.py b/smtcomp/archive.py
index d0d75f2c..8e6e35fb 100644
--- a/smtcomp/archive.py
+++ b/smtcomp/archive.py
@@ -38,6 +38,10 @@ def is_unpack_present(archive: defs.Archive, dst: Path) -> bool:
     return any(True for _ in d.iterdir())
 
 
+def command_path(command: defs.Command, archive: defs.Archive, dst: Path) -> Path:
+    return archive_unpack_dir(archive, dst).joinpath(command.binary)
+
+
 def find_command(command: defs.Command, archive: defs.Archive, dst: Path) -> Path:
     d = archive_unpack_dir(archive, dst)
     if not (d.exists()):
diff --git a/smtcomp/benchexec.py b/smtcomp/benchexec.py
index faac6361..9c0d4147 100644
--- a/smtcomp/benchexec.py
+++ b/smtcomp/benchexec.py
@@ -6,7 +6,7 @@
 from yattag import Doc, indent
 
 from smtcomp import defs
-from smtcomp.archive import find_command, archive_unpack_dir
+from smtcomp.archive import find_command, archive_unpack_dir, command_path
 from pydantic import BaseModel
 
 import shlex
@@ -63,8 +63,11 @@ def generate_tool_module(s: defs.Submission, cachedir: Path, track: defs.Track)
         f.write(f"    NAME = '{s.name}'\n")
         if s.command is not None:
             assert s.archive is not None
-            executable_path = find_command(s.command, s.archive, cachedir)
-            executable = str(relpath(executable_path, start=str(cachedir)))
+            if s.command.compa_starexec:
+                executable_path = find_command(s.command, s.archive, cachedir)
+                executable = str(relpath(executable_path, start=str(cachedir)))
+            else:
+                executable = str(command_path(s.command, s.archive, Path()))
             f.write(f"    EXECUTABLE = '{executable}'\n")
 
         required_paths = []
@@ -147,10 +150,10 @@ def cmdtask_for_submission(s: defs.Submission, cachedir: Path, target_track: def
             for _, logics in divisions.items():
                 tasks.extend([str(logic) + suffix for logic in logics])
             if tasks:
-                executable_path = find_command(command, archive, cachedir)
-                executable = str(relpath(executable_path, start=str(cachedir)))
                 if command.compa_starexec:
                     assert command.arguments == []
+                    executable_path = find_command(command, archive, cachedir)
+                    executable = str(relpath(executable_path, start=str(cachedir)))
                     dirname = str(relpath(executable_path.parent, start=str(cachedir)))
 
                     options = [
@@ -160,6 +163,7 @@ def cmdtask_for_submission(s: defs.Submission, cachedir: Path, target_track: def
                         "compa_starexec",
                     ]
                 else:
+                    executable = str(command_path(command, archive, Path()))
                     options = [executable] + command.arguments
                 cmdtask = CmdTask(
                     name=f"{s.name},{i},{track}",
diff --git a/smtcomp/main.py b/smtcomp/main.py
index 5f7101ee..2ff414ac 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -157,7 +157,10 @@ def generate_benchexec(
 ) -> None:
     """
     Generate the benchexec file for the given submissions
+
+    (The cachedir directory need to contain unpacked archive only with compa_starexec command)
     """
+    (cachedir / "tools").mkdir(parents=True, exist_ok=True)
     for file in track(files):
         s = submission.read(str(file))
 

From be0ef77411dec843626c98fff9fd9ce52e6b902d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 17:23:46 +0200
Subject: [PATCH 11/15] Allows to generate  xml for model and unsat core

---
 smtcomp/benchexec.py | 68 +++++++++++++++++++++++++-------------------
 smtcomp/main.py      | 15 +++++++---
 2 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/smtcomp/benchexec.py b/smtcomp/benchexec.py
index 9c0d4147..2b55e375 100644
--- a/smtcomp/benchexec.py
+++ b/smtcomp/benchexec.py
@@ -13,6 +13,30 @@
 from re import sub
 
 
+def get_suffix(track: defs.Track) -> str:
+    match track:
+        case defs.Track.Incremental:
+            return "_inc"
+        case defs.Track.ModelValidation:
+            return "_model"
+        case defs.Track.UnsatCore:
+            return "_unsatcore"
+        case defs.Track.SingleQuery:
+            return ""
+        case _:
+            raise ValueError("No Cloud or Parallel")
+
+
+def get_xml_name(s: defs.Submission, track: defs.Track) -> str:
+    suffix = get_suffix(track)
+    return sub(r"\W+", "", s.name.lower()) + suffix + ".xml"
+
+
+def tool_module_name(s: defs.Submission, incremental: bool) -> str:
+    suffix = "_inc" if incremental else ""
+    return sub(r"\W+", "", s.name.lower()) + suffix
+
+
 class CmdTask(BaseModel):
     name: str
     options: List[str]
@@ -36,27 +60,17 @@ def generate_benchmark_yml(benchmark: Path, expected_result: Optional[bool], ori
             f.write(f"    expected_verdict: {expected_str}\n")
 
 
-def tool_module_name(s: defs.Submission, track: defs.Track) -> str:
-    return sub(r"\W+", "", s.name.lower()) + get_suffix(track)
-
-
-def generate_tool_module(s: defs.Submission, cachedir: Path, track: defs.Track) -> None:
-    name = tool_module_name(s, track)
+def generate_tool_module(s: defs.Submission, cachedir: Path, incremental: bool) -> None:
+    name = tool_module_name(s, incremental)
     file = cachedir / "tools" / f"{name}.py"
 
     with file.open("w") as f:
-        match track:
-            case defs.Track.Incremental:
-                base_module = "incremental_tool"
-                base_class = "IncrementalSMTCompTool"
-            case defs.Track.SingleQuery:
-                base_module = "tool"
-                base_class = "SMTCompTool"
-            case _:
-                rich.print(
-                    f"[red]generate_tool_module command does not yet work for the competition track: {track}[/red]"
-                )
-                exit(1)
+        if incremental:
+            base_module = "incremental_tool"
+            base_class = "IncrementalSMTCompTool"
+        else:
+            base_module = "tool"
+            base_class = "SMTCompTool"
         f.write(f"from tools.{base_module} import {base_class}\n\n")
         f.write(f"class Tool({base_class}):  # type: ignore\n")
 
@@ -83,8 +97,13 @@ def generate_tool_module(s: defs.Submission, cachedir: Path, track: defs.Track)
             f.write(f"    REQUIRED_PATHS = {required_paths}\n")
 
 
+def generate_tool_modules(s: defs.Submission, cachedir: Path) -> None:
+    generate_tool_module(s, cachedir, True)
+    generate_tool_module(s, cachedir, False)
+
+
 def generate_xml(
-    timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: List[CmdTask], cachedir: Path, tool_module_name: str
+    timelimit_s: int, memlimit_M: int, cpuCores: int, cmdtasks: List[CmdTask], file: Path, tool_module_name: str
 ) -> None:
     doc, tag, text = Doc().tagtext()
 
@@ -120,20 +139,9 @@ def generate_xml(
         with tag("propertyfile"):
             text("benchmarks/properties/SMT.prp")
 
-    file = cachedir.joinpath(f"{tool_module_name}.xml")
     file.write_text(indent(doc.getvalue()))
 
 
-def get_suffix(track: defs.Track) -> str:
-    match track:
-        case defs.Track.Incremental:
-            return "_inc"
-        case defs.Track.ModelValidation:
-            return "_model"
-        case _:
-            return ""
-
-
 def cmdtask_for_submission(s: defs.Submission, cachedir: Path, target_track: defs.Track) -> List[CmdTask]:
     res: List[CmdTask] = []
     i = -1
diff --git a/smtcomp/main.py b/smtcomp/main.py
index 2ff414ac..f38e344d 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -163,19 +163,26 @@ def generate_benchexec(
     (cachedir / "tools").mkdir(parents=True, exist_ok=True)
     for file in track(files):
         s = submission.read(str(file))
+        benchexec.generate_tool_modules(s, cachedir)
 
-        for target_track in [defs.Track.SingleQuery, defs.Track.Incremental]:
-            tool_module_name = benchexec.tool_module_name(s, target_track)
-            benchexec.generate_tool_module(s, cachedir, target_track)
+        for target_track in [
+            defs.Track.SingleQuery,
+            defs.Track.Incremental,
+            defs.Track.ModelValidation,
+            defs.Track.UnsatCore,
+        ]:
+            tool_module_name = benchexec.tool_module_name(s, target_track == defs.Track.Incremental)
 
             res = benchexec.cmdtask_for_submission(s, cachedir, target_track)
             if res:
+                basename = benchexec.get_xml_name(s, target_track)
+                file = cachedir / basename
                 benchexec.generate_xml(
                     timelimit_s=timelimit_s,
                     memlimit_M=memlimit_M,
                     cpuCores=cpuCores,
                     cmdtasks=res,
-                    cachedir=cachedir,
+                    file=file,
                     tool_module_name=tool_module_name,
                 )
 

From 7fdf7551d83d5a5d9f44b8cc4489816a0028e4ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 21:18:01 +0200
Subject: [PATCH 12/15] Select Sat for ModelValidation and Unsat for Unsat-core

---
 smtcomp/main.py                |  8 ++++++++
 smtcomp/scramble_benchmarks.py | 10 ++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/smtcomp/main.py b/smtcomp/main.py
index f38e344d..1cefcce8 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -404,6 +404,8 @@ def show_sq_selection_stats(
             old_never_ran=pl.col("file").filter(run=False, new=False).len(),
             new=pl.col("new").sum(),
             selected=pl.col("file").filter(selected=True).len(),
+            selected_sat=pl.col("file").filter(selected=True, status=int(defs.Status.Sat)).len(),
+            selected_unsat=pl.col("file").filter(selected=True, status=int(defs.Status.Unsat)).len(),
             selected_already_run=pl.col("file").filter(selected=True, run=True).len(),
         )
         .sort(by="logic")
@@ -417,6 +419,8 @@ def show_sq_selection_stats(
     table.add_column("never compet.", justify="right", style="magenta")
     table.add_column("new", justify="right", style="magenta1")
     table.add_column("selected", justify="right", style="green3")
+    table.add_column("selected sat", justify="right", style="green4")
+    table.add_column("selected unsat", justify="right", style="green4")
     table.add_column("selected already run", justify="right", style="green4")
 
     used_logics = defs.logic_used_for_track(defs.Track.SingleQuery)
@@ -434,6 +438,8 @@ def show_sq_selection_stats(
             str(d["old_never_ran"]),
             str(d["new"]),
             str(d["selected"]),
+            str(d["selected_sat"]),
+            str(d["selected_unsat"]),
             str(d["selected_already_run"]),
         )
 
@@ -445,6 +451,8 @@ def show_sq_selection_stats(
         str(b3["old_never_ran"].sum()),
         str(b3["new"].sum()),
         str(b3["selected"].sum()),
+        str(b3["selected_sat"].sum()),
+        str(b3["selected_unsat"].sum()),
         str(b3["selected_already_run"].sum()),
     )
 
diff --git a/smtcomp/scramble_benchmarks.py b/smtcomp/scramble_benchmarks.py
index ca625e29..d579cdd8 100644
--- a/smtcomp/scramble_benchmarks.py
+++ b/smtcomp/scramble_benchmarks.py
@@ -154,16 +154,10 @@ def select_and_scramble(
             selected = smtcomp.selection.helper_compute_incremental(config)
         case defs.Track.ModelValidation:
             selected = smtcomp.selection.helper_compute_sq(config)
-            rich.print(
-                f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
-            )
-            exit(1)
+            selected = selected.filter(status=int(defs.Status.Sat))
         case defs.Track.UnsatCore:
             selected = smtcomp.selection.helper_compute_sq(config)
-            rich.print(
-                f"[red]The scramble_benchmarks command does not yet work for the competition track: {competition_track}[/red]"
-            )
-            exit(1)
+            selected = selected.filter(status=int(defs.Status.Unsat))
         case defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
             selected = smtcomp.selection.helper_compute_sq(config)
             rich.print(

From 2397c9fbd1fa7c78f2d4f53a4229896d3efa3198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Bobot?= <francois.bobot@cea.fr>
Date: Wed, 12 Jun 2024 21:41:22 +0200
Subject: [PATCH 13/15] Use previous results in order to complete status

---
 smtcomp/defs.py      |  4 ++++
 smtcomp/main.py      |  2 ++
 smtcomp/selection.py | 22 ++++++++++++++++++++--
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/smtcomp/defs.py b/smtcomp/defs.py
index f278b8c8..1474ae7b 100644
--- a/smtcomp/defs.py
+++ b/smtcomp/defs.py
@@ -1351,6 +1351,10 @@ class Config:
     cpuCores = 4
     min_used_benchmarks = 300
     ratio_of_used_benchmarks = 0.5
+    use_previous_results_for_status = True
+    """
+    Complete the status given in the benchmarks using previous results
+    """
     old_criteria = False
     """"Do we try to emulate <= 2023 criteria that does not really follow the rules"""
     invert_triviality = False
diff --git a/smtcomp/main.py b/smtcomp/main.py
index 1cefcce8..2d4add10 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -382,6 +382,7 @@ def show_sq_selection_stats(
     min_use_benchmarks: int = defs.Config.min_used_benchmarks,
     ratio_of_used_benchmarks: float = defs.Config.ratio_of_used_benchmarks,
     invert_triviality: bool = False,
+    use_previous_results_for_status: bool = defs.Config.use_previous_results_for_status,
 ) -> None:
     """
     Show statistics on the benchmarks selected for single query track
@@ -395,6 +396,7 @@ def show_sq_selection_stats(
     config.ratio_of_used_benchmarks = ratio_of_used_benchmarks
     config.invert_triviality = invert_triviality
     config.old_criteria = old_criteria
+    config.use_previous_results_for_status = use_previous_results_for_status
     benchmarks_with_info = smtcomp.selection.helper_compute_sq(config)
     b3 = (
         benchmarks_with_info.group_by(["logic"])
diff --git a/smtcomp/selection.py b/smtcomp/selection.py
index e7dd216a..ad8409f7 100644
--- a/smtcomp/selection.py
+++ b/smtcomp/selection.py
@@ -38,12 +38,23 @@ def find_trivial(results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
         .agg(
             trivial=
             # All the results are sat or unsat and the time is below 1s
-            ((c_result != int(defs.Status.Unknown)) & (c_cpu_time <= 1.0)).all()
+            ((c_result != int(defs.Status.Unknown)) & (c_cpu_time <= 1.0)).all(),
+            # Compute the full result by year
+            result=pl.when((c_result == int(defs.Status.Sat)).sum() >= 2)
+            .then(int(defs.Status.Sat))
+            .when((c_result == int(defs.Status.Unsat)).sum() >= 2)
+            .then(int(defs.Status.Unsat))
+            .otherwise(int(defs.Status.Unknown)),
         )
         .group_by("file")
         .agg(
             trivial=c_trivial.any() if config.old_criteria else c_trivial.all(),
             run=True,
+            result=pl.when((c_result == int(defs.Status.Sat)).any())
+            .then(int(defs.Status.Sat))
+            .when((c_result == int(defs.Status.Unsat)).any())
+            .then(int(defs.Status.Unsat))
+            .otherwise(int(defs.Status.Unknown)),
         )
     )
     return tally
@@ -56,10 +67,17 @@ def join_default_with_False(original: pl.LazyFrame, new: pl.LazyFrame, on: str)
 def add_trivial_run_info(benchmarks: pl.LazyFrame, previous_results: pl.LazyFrame, config: defs.Config) -> pl.LazyFrame:
 
     is_trivial = find_trivial(previous_results, config)
-    return join_default_with_False(benchmarks, is_trivial, on="file").with_columns(
+    with_info = join_default_with_False(benchmarks, is_trivial, on="file").with_columns(
         new=pl.col("family").str.starts_with(str(config.current_year))
     )
 
+    if config.use_previous_results_for_status:
+        with_info = with_info.with_columns(
+            status=pl.when(pl.col("status") != int(defs.Status.Unknown)).then(pl.col("status")).otherwise(c_result)
+        )
+
+    return with_info
+
 
 def track_selection(benchmarks_with_info: pl.LazyFrame, config: defs.Config, target_track: defs.Track) -> pl.LazyFrame:
     used_logics = defs.logic_used_for_track(target_track)

From dc399c0fe55f74e8414a630838bdc95a37496211 Mon Sep 17 00:00:00 2001
From: Martin Bromberger <mbromber@mpi-inf.mpg.de>
Date: Wed, 12 Jun 2024 13:26:48 +0200
Subject: [PATCH 14/15] Test solver no longer tests incremental benchmarks.

---
 smtcomp/main.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/smtcomp/main.py b/smtcomp/main.py
index 2d4add10..c7f58a80 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -607,13 +607,11 @@ def read_submission(file: Path) -> defs.Submission:
             for part in sub.complete_participations():
                 for track, divisions in part.tracks.items():
                     match track:
-                        case defs.Track.Incremental:
-                            statuses = [defs.Status.Sat, defs.Status.Unsat]
                         case defs.Track.ModelValidation:
                             statuses = [defs.Status.Sat]
                         case defs.Track.SingleQuery:
                             statuses = [defs.Status.Sat, defs.Status.Unsat]
-                        case defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
+                        case defs.Track.Incremental | defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
                             continue
                     for _, logics in divisions.items():
                         for logic in logics:

From 3da485d574adf267daad40445f72e8e55da32b85 Mon Sep 17 00:00:00 2001
From: Martin Bromberger <mbromber@mpi-inf.mpg.de>
Date: Wed, 12 Jun 2024 13:30:33 +0200
Subject: [PATCH 15/15] Blackify.

---
 smtcomp/main.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/smtcomp/main.py b/smtcomp/main.py
index c7f58a80..b0e8a0c0 100644
--- a/smtcomp/main.py
+++ b/smtcomp/main.py
@@ -611,7 +611,13 @@ def read_submission(file: Path) -> defs.Submission:
                             statuses = [defs.Status.Sat]
                         case defs.Track.SingleQuery:
                             statuses = [defs.Status.Sat, defs.Status.Unsat]
-                        case defs.Track.Incremental | defs.Track.UnsatCore | defs.Track.ProofExhibition | defs.Track.Cloud | defs.Track.Parallel:
+                        case (
+                            defs.Track.Incremental
+                            | defs.Track.UnsatCore
+                            | defs.Track.ProofExhibition
+                            | defs.Track.Cloud
+                            | defs.Track.Parallel
+                        ):
                             continue
                     for _, logics in divisions.items():
                         for logic in logics: