Skip to content

Update format & lean, handle mdata #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 109 additions & 34 deletions Export.lean
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@ import Lean

open Lean

instance : Hashable RecursorRule where
hash r := hash (r.ctor, r.nfields, r.rhs)

structure Context where
env : Environment

structure State where
visitedNames : HashMap Name Nat := .insert {} .anonymous 0
visitedLevels : HashMap Level Nat := .insert {} .zero 0
visitedExprs : HashMap Expr Nat := {}
visitedRecRules : HashMap RecursorRule Nat := {}
visitedConstants : NameHashSet := {}
visitedQuot : Bool := false

abbrev M := ReaderT Context <| StateT State IO

Expand Down Expand Up @@ -59,65 +62,137 @@ def uint8ToHex (c : UInt8) : String :=
let d1 := c % 16
(hexDigitRepr d2.toNat ++ hexDigitRepr d1.toNat).toUpper

partial def dumpExpr (e : Expr) : M Nat := do
if let .mdata _ e := e then
return (← dumpExpr e)
getIdx e (·.visitedExprs) ({ · with visitedExprs := · }) do
match e with
| .bvar i => return s!"#EV {i}"
| .sort l => return s!"#ES {← dumpLevel l}"
| .const n us =>
return s!"#EC {← dumpName n} {← seq <$> us.mapM dumpLevel}"
| .app f e => return s!"#EA {← dumpExpr f} {← dumpExpr e}"
| .lam n d b bi => return s!"#EL {dumpInfo bi} {← dumpName n} {← dumpExpr d} {← dumpExpr b}"
| .letE n d v b _ => return s!"#EZ {← dumpName n} {← dumpExpr d} {← dumpExpr v} {← dumpExpr b}"
| .forallE n d b bi => return s!"#EP {dumpInfo bi} {← dumpName n} {← dumpExpr d} {← dumpExpr b}"
| .mdata .. | .fvar .. | .mvar .. => unreachable!
-- extensions compared to Lean 3
| .proj s i e => return s!"#EJ {← dumpName s} {i} {← dumpExpr e}"
| .lit (.natVal i) => return s!"#ELN {i}"
| .lit (.strVal s) => return s!"#ELS {s.toUTF8.toList.map uint8ToHex |> seq}"
/-
Eliminate `mdata` nodes, while dumping an expression.

When an `mdata` node is encountered, skip it. For everything else, update `e`
IFF a sub-expression contained `mdata`.

returns `(had mdata, the expression with any mdata removed, e's export index)`
-/
@[inline]
partial def dumpExprAux (e : Expr) : M (Bool × Expr × Nat) := do
/- If this is an mdata node, dump the inner item and return the expr without mdata -/
if let .mdata _ e' := e then
let (_, e'', idx) ← dumpExprAux e'
return (true, e'', idx)
/- If we've already seen this expression, use the cached data -/
if let some idx := (← get).visitedExprs.find? e then
return (false, e, idx)
else
let (ck, e, s) ←
match e with
| .mdata .. | .fvar .. | .mvar .. => unreachable!
| .bvar i => pure (false, e, (return s!"#EV {i}"))
| .sort l => pure (false, e, (return s!"#ES {← dumpLevel l}"))
| .const n us => pure (false, e, (return s!"#EC {← dumpName n} {← seq <$> us.mapM dumpLevel}"))
| .lit (.natVal i) => pure (false, e, (return s!"#ELN {i}"))
| .lit (.strVal s) => pure (false, e, (return s!"#ELS {s.toUTF8.toList.map uint8ToHex |> seq}"))
| .app f a =>
let (f_rebuilt, f, f_idx) ← dumpExprAux f
let (a_rebuilt, a, a_idx) ← dumpExprAux a
let ck := f_rebuilt || a_rebuilt
pure (ck, if ck then e.updateApp! f a else e, (return s!"#EA {f_idx} {a_idx}"))
| .lam n d b bi =>
let (d_rebuilt, d, d_idx) ← dumpExprAux d
let (b_rebuilt, b, b_idx) ← dumpExprAux b
let ck := (d_rebuilt || b_rebuilt)
pure (ck, if ck then e.updateLambdaE! d b else e, (return s!"#EL {dumpInfo bi} {← dumpName n} {d_idx} {b_idx}"))
| .letE n d v b _ =>
let (d_rebuilt, d, d_idx) ← dumpExprAux d
let (v_rebuilt, v, v_idx) ← dumpExprAux v
let (b_rebuilt, b, b_idx) ← dumpExprAux b
let ck := (d_rebuilt || v_rebuilt || b_rebuilt)
pure (ck, if ck then e.updateLet! d v b else e, (return s!"#EZ {← dumpName n} {d_idx} {v_idx} {b_idx}"))
| .forallE n d b bi =>
let (d_rebuilt, d, d_idx) ← dumpExprAux d
let (b_rebuilt, b, b_idx) ← dumpExprAux b
let ck := (d_rebuilt || b_rebuilt)
pure (ck, if ck then e.updateForallE! d b else e, (return s!"#EP {dumpInfo bi} {← dumpName n} {d_idx} {b_idx}"))
| .proj s i e2 =>
let (e_rebuilt, ex, e'_idx) ← dumpExprAux e2
let ck := e_rebuilt
pure (ck, if ck then (e.updateProj! ex) else e, (return s!"#EJ {← dumpName s} {i} {e'_idx}"))
tryPrintE ck e s
where
tryPrintE (hadMData: Bool) (e : Expr) (s : M String) : M (Bool × Expr × Nat) := do
/- If the expression had `mdata` and was rebuilt, check the rebuilt expression
against the cache once more -/
if hadMData then
if let some idx := (← get).visitedExprs.find? e then
return (hadMData, e, idx)
let idx := (← get).visitedExprs.size
modify (fun st => { st with visitedExprs := st.visitedExprs.insert e idx })
IO.println s!"{idx} {← s}"
return (hadMData, e, idx)

@[inline]
def dumpExpr (e : Expr) : M Nat := do
let (_, _, n) ← dumpExprAux e
return n

def dumpHints : ReducibilityHints → String
| .opaque => "O"
| .abbrev => "A"
| .regular n => s!"R {n}"

partial def dumpConstant (c : Name) : M Unit := do
if (← get).visitedConstants.contains c then
return
modify fun st => { st with visitedConstants := st.visitedConstants.insert c }
match (← read).env.find? c |>.get! with
| .axiomInfo val => do
if val.isUnsafe then
return
dumpDeps val.type
IO.println s!"#AX {← dumpName c} {← dumpExpr val.type} {← seq <$> val.levelParams.mapM dumpName}"
| .defnInfo val => do
if val.safety != .safe then
return
dumpDeps val.type
dumpDeps val.value
IO.println s!"#DEF {← dumpName c} {← dumpExpr val.type} {← dumpExpr val.value} {← seq <$> val.levelParams.mapM dumpName}"
IO.println s!"#DEF {← dumpName c} {← dumpExpr val.type} {← dumpExpr val.value} {dumpHints val.hints} {← seq <$> val.levelParams.mapM dumpName}"
| .thmInfo val => do
dumpDeps val.type
dumpDeps val.value
IO.println s!"#DEF {← dumpName c} {← dumpExpr val.type} {← dumpExpr val.value} {← seq <$> val.levelParams.mapM dumpName}"
| .opaqueInfo val =>
IO.println s!"#THM {← dumpName c} {← dumpExpr val.type} {← dumpExpr val.value} {← seq <$> val.levelParams.mapM dumpName}"
| .opaqueInfo val => do
if val.isUnsafe then
return
dumpDeps val.type
dumpDeps val.value
IO.println s!"#DEF {← dumpName c} {← dumpExpr val.type} {← dumpExpr val.value} {← seq <$> val.levelParams.mapM dumpName}"
| .quotInfo _ =>
-- Lean 4 uses 4 separate `Quot` declarations in the environment, but Lean 3 uses a single special declarations
if (← get).visitedQuot then
return
modify ({ · with visitedQuot := true })
-- the only dependency of the quot module
dumpConstant `Eq
IO.println s!"#QUOT"
IO.println s!"#OPAQ {← dumpName c} {← dumpExpr val.type} {← dumpExpr val.value} {← seq <$> val.levelParams.mapM dumpName}"
| .quotInfo val =>
dumpDeps val.type
IO.println s!"#QUOT {← dumpName c} {← dumpExpr val.type} {← seq <$> val.levelParams.mapM dumpName}"
| .inductInfo val => do
if val.isUnsafe then
return
dumpDeps val.type
for ctor in val.ctors do
dumpDeps ((← read).env.find? ctor |>.get!.type)
let ctors ← (·.join) <$> val.ctors.mapM fun ctor => return [← dumpName ctor, ← dumpExpr ((← read).env.find? ctor |>.get!.type)]
IO.println s!"#IND {val.numParams} {← dumpName c} {← dumpExpr val.type} {val.numCtors} {seq ctors} {← seq <$> val.levelParams.mapM dumpName}"
| .ctorInfo _ | .recInfo _ => return
let indNameIdxs ← val.all.mapM dumpName
let ctorNameIdxs ← val.ctors.mapM (fun ctor => dumpName ctor)
let isRec := if val.isRec then 1 else 0
let isNested := if val.isNested then 1 else 0
IO.println s!"#IND {← dumpName c} {← dumpExpr val.type} {isRec} {isNested} {val.numParams} {val.numIndices} {indNameIdxs.length} {seq indNameIdxs} {val.numCtors} {seq ctorNameIdxs} {← seq <$> val.levelParams.mapM dumpName}"
| .ctorInfo val =>
if val.isUnsafe then
return
dumpDeps val.type
IO.println s!"#CTOR {← dumpName c} {← dumpExpr val.type} {← dumpName val.induct} {val.cidx} {val.numParams} {val.numFields} {← seq <$> val.levelParams.mapM dumpName}"
| .recInfo val =>
if val.isUnsafe then
return
dumpDeps val.type
let indNameIdxs ← val.all.mapM dumpName
let k := if val.k then 1 else 0
let ruleIdxs ← val.rules.mapM (fun rule => dumpRecRule rule)
IO.println s!"#REC {← dumpName c} {← dumpExpr val.type} {indNameIdxs.length} {seq indNameIdxs} {val.numParams} {val.numIndices} {val.numMotives} {val.numMinors} {ruleIdxs.length} {seq ruleIdxs} {k} {← seq <$> val.levelParams.mapM dumpName}"
where
dumpDeps e := do
for c in e.getUsedConstants do
dumpConstant c
dumpRecRule (rule : RecursorRule) : M Nat := getIdx rule (·.visitedRecRules) ({ · with visitedRecRules := · }) do
dumpDeps (rule.rhs)
return s!"#RR {← dumpName rule.ctor} {rule.nfields} {← dumpExpr rule.rhs}"
3 changes: 3 additions & 0 deletions Main.lean
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import Export
open Lean

def semver := "0.1.2"

def main (args : List String) : IO Unit := do
initSearchPath (← findSysroot)
let (imports, constants) := args.span (· != "--")
Expand All @@ -10,5 +12,6 @@ def main (args : List String) : IO Unit := do
| some cs => cs.map fun c => Syntax.decodeNameLit ("`" ++ c) |>.get!
| none => env.constants.toList.map Prod.fst |>.filter (!·.isInternal)
M.run env do
IO.println semver
for c in constants do
let _ ← dumpConstant c
119 changes: 118 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
A simple declaration exporter for Lean 4 using the [Lean 3 export format](https://github.com/leanprover/lean/blob/master/doc/export_format.md)
A simple declaration exporter for Lean 4.

An exporter is a program which emits Lean declarations using Lean's kernel language, for consumption by external type checkers. Producing an export file is a complete exit from the Lean toolchain, and the data in the file can be checked with entirely external software.

The file format is described later in this document, and examples of the exporter's output can be found in the `examples` folder of this repository.

## How to Run

After building the program, users can invoke the exporter from the command line.

```sh
$ lake exe lean4export <mods> [-- <decls>]
```
This exports the contents of the given Lean modules, looked up in the core library or `LEAN_PATH` (as e.g. initialized by an outer `lake env`) and their transitive dependencies.

A specific list of declarations to be exported from these modules can be given after a separating `--`.

If you're getting an error about not finding your project, you can run the exporter against your current directory without modifying the `LEAN_PATH`:

```sh
$ lake env <path to lean4export bin> <mods> [-- <decls>]
```

## Format Extensions

The following commands have been added to represent new features of the Lean 4 type system.
Expand Down Expand Up @@ -49,3 +62,107 @@ Example: the encoding of `let x : Nat := Nat.zero; x` is
2 #EV 0
3 #EZ 1 0 1 2
```

## Export file format (ver 0.1.2)

For clarity, some of the compound items are decorated here with a name, for example `(name : T)`, but they appear in the export file as just an element of `T`.

The export scheme for mutual and nested inductives is as follows:
+ `Inductive.inductiveNames` contains the names of all types in the `mutual .. end` block. The names of any other inductive types used in a nested (but not mutual) construction will not be included.
+ `Inductive.constructorNames` contains the names of all constructors for THAT inductive type, and no others (no constructors of the other types in a mutual block, and no constructors from any nested construction).

**NOTE:** readers writing their own parsers and/or checkers should initialize names[0] as the anonymous name, and levels[0] as universe zero, as they are not emitted by the exporter, but are expected to occupy the name and level indices for 0.

```
File ::= ExportFormatVersion Item*

ExportFormatVersion ::= nat '.' nat '.' nat

Item ::= Name | Universe | Expr | RecRule | Declaration

Declaration ::=
| Axiom
| Quotient
| Definition
| Theorem
| Inductive
| Constructor
| Recursor

nidx, uidx, eidx, ridx ::= nat

Name ::=
| nidx "#NS" nidx string
| nidx "#NI" nidx nat

Universe ::=
| uidx "#US" uidx
| uidx "#UM" uidx uidx
| uidx "#UIM" uidx uidx
| uidx "#UP" nidx

Expr ::=
| eidx "#EV" nat
| eidx "#ES" uidx
| eidx "#EC" nidx uidx*
| eidx "#EA" eidx eidx
| eidx "#EL" Info nidx eidx
| eidx "#EP" Info nidx eidx eidx
| eidx "#EZ" Info nidx eidx eidx eidx
| eidx "#EJ" nidx nat eidx
| eidx "#ELN" nat
| eidx "#ELS" (hexhex)*

Info ::= "#BD" | "#BI" | "#BS" | "#BC"

Hint ::= "O" | "A" | "R" nat

RecRule ::= ridx "#RR" (ctorName : nidx) (nFields : nat) (val : eidx)

Axiom ::= "#AX" (name : nidx) (type : eidx) (uparams : uidx*)

Def ::= "#DEF" (name : nidx) (type : eidx) (value : eidx) (hint : Hint) (uparams : uidx*)

Theorem ::= "#THM" (name : nidx) (type : eidx) (value : eidx) (uparams: uidx*)

Quotient ::= "#QUOT" (name : nidx) (type : eidx) (uparams : uidx*)

Inductive ::=
"#IND"
(name : nidx)
(type : eidx)
(isRecursive: 0 | 1)
(isNested : 0 | 1)
(numParams: nat)
(numIndices: nat)
(numInductives: nat)
(inductiveNames: nidx {numInductives})
(numConstructors : nat)
(constructorNames : nidx {numConstructors})
(uparams: uidx*)

Constructor ::=
"#CTOR"
(name : nidx)
(type : eidx)
(parentInductive : nidx)
(constructorIndex : nat)
(numParams : nat)
(numFields : nat)
(uparams: uidx*)

Recursor ::=
"#REC"
(name : nidx)
(type : eidx)
(numInductives : nat)
(inductiveNames: nidx {numInductives})
(numParams : nat)
(numIndices : nat)
(numMotives : nat)
(numMinors : nat)
(numRules : nat)
(recRules : ridx {numRules})
(k : 1 | 0)
(uparams : uidx*)
```
Loading