From ad81749d777c30205c5b5d00ddc448711603d731 Mon Sep 17 00:00:00 2001 From: grunweg Date: Thu, 31 Oct 2024 00:24:46 +0000 Subject: [PATCH] chore(scripts): require all scripts to be documented in the README (#14539) This is valuable so - contributors can discover existing useful tools, - existing knowledge is documented and not lost - it (hopefully) inspires to re-consider if all each script is useful/why we have it. Having a [bounded number of entry-points](https://matklad.github.io/2023/12/31/O(1)-build-file.html) to CI is useful to have. This is implemented as a new check, piggy-backing the `lint_style` executable. To make CI pass, this PR adds a README file to `scripts`, containing stub descriptions for the current files. Future PRs should fill this out. Co-authored-by: Kim Morrison Co-authored-by: Michael Rothgang Co-authored-by: Damiano Testa --- scripts/README.md | 95 +++++++++++++++++++++++++++++++++++++++++ scripts/fix_unused.py | 3 ++ scripts/lint-style.lean | 31 ++++++++++++-- 3 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 scripts/README.md diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000000000..33a295aa934b5 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,95 @@ +# Miscellaneous scripts for working on mathlib + +This directory contains miscellaneous scripts that are useful for working on or with mathlib. +When adding a new script, please make sure to document it here, so other readers have a chance +to learn about it as well! + + +## Current scripts and their purpose + +**Installation scripts** +- `install_debian.sh`, `install_macos.sh` + Installation scripts referenced from the leanprover community install pages. + https://leanprover-community.github.io/install/debian.html + https://leanprover-community.github.io/install/macos.html + If these web pages are deprecated or removed, we should remove these scripts. + +**Tool for manual maintenance** +- `fix_unused.py` + Bulk processing of unused variable warnings, replacing them with `_`. + +**Analyzing Mathlib's import structure** +- `unused_in_pole.sh` (followed by an optional ``, defaulting to `Mathlib`) + calls `lake exe pole --loc --to ` to compute the longest + pole to a given target module, and then feeds this into + `lake exe unused` to analyze transitively unused imports. + Generates `unused.md` containing a markdown table showing the unused imports, + and suggests `lake exe graph` commands to visualize the largest "rectangles" of unused imports. + +**CI workflow** +- `mk_all.lean` + run via `lake exe mk_all`, regenerates the import-only files + `Mathlib.lean`, `Mathlib/Tactic.lean`, `Archive.lean` and `Counterexamples.lean` +- `lint-style.lean`, `lint-style.py`, `print-style-errors.sh` + style linters, written in Python and Lean. Run via `lake exe lint-style`. + Medium-term, the latter two scripts should be rewritten and incorporated in `lint-style.lean`. +- `lint-bib.sh` + normalize the BibTeX file `docs/references.bib` using `bibtool`. +- `yaml_check.py`, `check-yaml.lean` + Sanity checks for `undergrad.yaml`, `overview.yaml`, and `100.yaml`. +- `lean-pr-testing-comments.sh` + Generate comments and labels on a Lean or Batteries PR after CI has finished on a + `*-pr-testing-NNNN` branch. +- `update_nolints_CI.sh` + Update the `nolints.json` file to remove unneeded entries. Automatically run once a week. +- `bench_summary.lean` + Convert data retrieved from the speed center into a shorter, more accessible format, + and post a comment with this summary on github. +- `declarations_diff.sh` + Attempts to find which declarations have been removed and which have been added in the current PR + with respect to `master`, and posts a comment on github with the result. +- `autolabel.lean` is the Lean script in charge of automatically adding a `t-`label on eligible PRs. + Autolabelling is inferred by which directories the current PR modifies. + +**Managing nightly-testing and bump branches** +- `create-adaptation-pr.sh` implements some of the steps in the workflow described at + https://leanprover-community.github.io/contribute/tags_and_branches.html#mathlib-nightly-and-bump-branches + Specifically, it will: + - merge `master` into `bump/v4.x.y` + - create a new branch from `bump/v4.x.y`, called `bump/nightly-YYYY-MM-DD` + - merge `nightly-testing` into the new branch + - open a PR to merge the new branch back into `bump/v4.x.y` + - announce the PR on zulip + - finally, merge the new branch back into `nightly-testing`, if conflict resolution was required. + + If there are merge conflicts, it pauses and asks for help from the human driver. + +**Managing and tracking technical debt** +- `technical-debt-metrics.sh` + Prints information on certain kind of technical debt in Mathlib. + This output is automatically posted to zulip once a week. +- `init_creation.sh` + makes sure that every file in Mathlib transitively imports `Mathlib.init` + This may be removed soon, and replaced by a different mechanism. + +**Mathlib tactics** +- `polyrith_sage.py`, `polyrith_sage_helper.py` are required for `polyrith` + to communication with the Sage server. + +**Data files with linter exceptions** +- `nolints.json` contains exceptions for all `env_linter`s in mathlib. + For permanent and deliberate exceptions, add a `@[nolint lintername]` in the .lean file instead. +- `no_lints_prime_decls.txt` + contains temporary exceptions for the `docPrime` linter + +Both of these files should tend to zero over time; +please do not add new entries to these files. PRs removing (the need for) entries are welcome. + +**API surrounding CI** +- `update_PR_comment.sh` is a script that edits an existing message (or creates a new one). + It is used by the `PR_summary` workflow to maintain an up-to-date report with a searchable history. +- `get_tlabel.sh` extracts the `t-`label that a PR has (assuming that there is exactly one). + It is used by the `maintainer_merge` family of workflows to dispatch `maintainer merge` requests + to the appropriate topic on zulip. +- `count-trans-deps.py`, `import-graph-report.py` and `import_trans_difference.sh` produce various + summaries of changes in transitive imports that the `PR_summary` message incorporates. diff --git a/scripts/fix_unused.py b/scripts/fix_unused.py index 9bc5679bfdf57..9475d07bf9a89 100755 --- a/scripts/fix_unused.py +++ b/scripts/fix_unused.py @@ -2,6 +2,9 @@ # https://chatgpt.com/share/66f4e420-66bc-8001-8349-ce3cfb4f23c3 # Usage: lake build | scripts/fix_unused.py + +# Bulk processing of unused variable warnings, replacing them with `_`. + import re import sys diff --git a/scripts/lint-style.lean b/scripts/lint-style.lean index a7eb1c953caa1..953800e9826f3 100644 --- a/scripts/lint-style.lean +++ b/scripts/lint-style.lean @@ -12,10 +12,34 @@ import Cli.Basic This files defines the `lint-style` executable which runs all text-based style linters. The linters themselves are defined in `Mathlib.Tactic.Linter.TextBased`. + +In addition, this checks that every file in `scripts` is documented in its top-level README. -/ open Cli Mathlib.Linter.TextBased +open System.FilePath + +/-- Verifies that every file in the `scripts` directory is documented in `scripts/README.md`. +Return `True` if there are undocumented scripts, otherwise `False`. -/ +def allScriptsDocumented : IO Bool := do + -- Retrieve all scripts (except for the `bench` directory). + let allScripts ← (walkDir "scripts" fun p ↦ pure (p.components.getD 1 "" != "bench")) + let allScripts := allScripts.erase ("scripts" / "bench")|>.erase ("scripts" / "README.md") + -- Check if the README text contains each file enclosed in backticks. + let readme : String ← IO.FS.readFile ("scripts" / "README.md") + -- These are data files for linter exceptions: don't complain about these *for now*. + let dataFiles := #["noshake.json", "nolints-style.txt"] + -- For now, there are no scripts in sub-directories that should be documented. + let fileNames := allScripts.map (·.fileName.get!) + let undocumented := fileNames.filter fun script ↦ + !readme.containsSubstr s!"`{script}`" && !dataFiles.contains script + if undocumented.size > 0 then + IO.println s!"error: found {undocumented.size} undocumented script(s): \ + please describe the script(s) in 'scripts/README.md'\n \ + {String.intercalate "," undocumented.toList}" + return undocumented.size == 0 + /-- Implementation of the `lint-style` command line program. -/ def lintStyleCli (args : Cli.Parsed) : IO UInt32 := do let style : ErrorFormat := match args.hasFlag "github" with @@ -26,15 +50,16 @@ def lintStyleCli (args : Cli.Parsed) : IO UInt32 := do let mut allModules := #[] for s in ["Archive.lean", "Counterexamples.lean", "Mathlib.lean"] do allModules := allModules.append ((← IO.FS.lines s).map (·.stripPrefix "import ")) - -- note: since we manually add "Batteries" to "Mathlib.lean", we remove it here manually + -- Note: since we manually add "Batteries" to "Mathlib.lean", we remove it here manually. allModules := allModules.erase "Batteries" - let numberErrorFiles ← lintModules allModules style fix + let mut numberErrors ← lintModules allModules style fix + if !(← allScriptsDocumented) then numberErrors := numberErrors + 1 -- If run with the `--fix` argument, return a zero exit code. -- Otherwise, make sure to return an exit code of at most 125, -- so this return value can be used further in shell scripts. if args.hasFlag "fix" then return 0 - else return min numberErrorFiles 125 + else return min numberErrors 125 /-- Setting up command line options and help text for `lake exe lint-style`. -/ -- so far, no help options or so: perhaps that is fine?