Skip to content

Commit

Permalink
Merge pull request #4 from butterunderflow/dev-type-checker
Browse files Browse the repository at this point in the history
 type checker implementation
  • Loading branch information
butterunderflow authored Jun 18, 2024
2 parents e63640a + ffce151 commit 1470816
Show file tree
Hide file tree
Showing 24 changed files with 3,640 additions and 394 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
uses: ocaml/[email protected]
with:
# The OCaml compiler packages to initialise.
ocaml-compiler: 4.14.0
ocaml-compiler: 4.13.1
# The name and URL pair of the repository to fetch the packages from.

- name: Install menhir
Expand Down
2 changes: 1 addition & 1 deletion lib/common/dune
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
(inline_tests)
(wrapped false)
(preprocess
(pps ppx_sexp_conv ppx_inline_test ppx_jane)))
(pps ppx_sexp_conv ppx_deriving.eq ppx_deriving.show ppx_inline_test ppx_jane)))

(env
(dev
Expand Down
22 changes: 19 additions & 3 deletions lib/common/ident.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,23 @@ let compare_string = String.compare

let compare_int = Int.compare

type t = int * string [@@deriving compare]
type ident = int * string [@@deriving sexp, compare, show]

let sexp_of_t t = S.Atom (Printf.sprintf "%s/%d" (snd t) (fst t))
type t = ident

let t_of_sexp s =
let compare (index0, name0) (index1, name1) =
let index_cmp = compare_int index0 index1 in
if index_cmp <> 0 then index_cmp else compare_string name0 name1

let sexp_of_ident id = S.Atom (Printf.sprintf "%s/%d" (snd id) (fst id))

let name_of_ident (_index, name) = name

let index_of_ident (index, _name) = index

let mk_ident index name = (index, name)

let ident_of_sexp s =
match s with
| S.Atom s ->
let splited = String.split_on_char '/' s in
Expand All @@ -29,3 +41,7 @@ let create ~(hint : string) =
let rename (_, name) = create ~hint:name

let to_string x = Printf.sprintf "%s/%d" (snd x) (fst x)

let show_ident = to_string

let refresh () = index := 0
26 changes: 20 additions & 6 deletions lib/common/ident.mli
Original file line number Diff line number Diff line change
@@ -1,11 +1,25 @@
type t [@@deriving sexp, compare]
val compare_string : String.t -> String.t -> int

val same : t -> t -> bool
val compare_int : int -> int -> int

val from : string -> t
type ident [@@deriving sexp, show]

val create : hint:string -> t
type t = ident [@@deriving compare]

val rename : t -> t
val name_of_ident : ident -> string

val to_string : t -> string
val index_of_ident : ident -> int

val mk_ident : int -> string -> ident

val same : ident -> ident -> bool

val from : string -> ident

val create : hint:string -> ident

val rename : ident -> ident

val to_string : ident -> string

val refresh : unit -> unit
4 changes: 2 additions & 2 deletions lib/syntax/dune
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
(library
(name syntax)
(libraries sexplib menhirLib)
(libraries sexplib menhirLib common)
(inline_tests)
(preprocess
(pps ppx_sexp_conv ppx_inline_test ppx_jane visitors.ppx)))
(pps ppx_deriving.show ppx_sexp_conv ppx_inline_test ppx_jane visitors.ppx)))

(ocamllex lexer)

Expand Down
9 changes: 7 additions & 2 deletions lib/syntax/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ let type_var = '\'' (upper_case | underline | lower_case) alphanumerical*
let digits = digit +
let integers = (sign?) digits

let boolean = "True" | "False"
let boolean = "true" | "false"

let strings = '\"' _* '\"'

Expand All @@ -40,8 +40,13 @@ rule token = parse
| [' ' '\t' '\n'] { token lexbuf }
| eof { EOF }
| "let" { LET }
| "if" { IF }
| "then" { THEN }
| "else" { ELSE }
| "in" { IN }
| "rec" { REC }
| "match" { MATCH }
| "with" { WITH }
| "type" { TYPE }
| "end" { END }
| "and" { AND }
Expand All @@ -66,10 +71,10 @@ rule token = parse
| ':' { COLON }
| ';' { SEMI }
| mod_ident as m { MIDENT m}
| boolean as b { BOOL (bool_of_string b)}
| ident as i { IDENT i }
| type_var as t { TYPEVAR t }
| integers as n { INT (int_of_string n) }
| boolean as b { BOOL (bool_of_string b)}
| "." { DOT}
| strings as s { STRING s}

Expand Down
136 changes: 84 additions & 52 deletions lib/syntax/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,28 @@ let mk_type_ref fon t_args =
%token <bool> BOOL
%token <string> STRING


%token TYPE
%token EOF
%token LET
%token MODULE

%nonassoc LET TYPE
%nonassoc over_TOP

%token IF
%token THEN
%token ELSE
%token REC
%token END
%token MODULE
%token SIG
%token STRUCT
%token VAL
%token FUNCTOR
%token FUN
%token ARROW
%token MATCH
%token WITH
%token <string> IDENT
%token <string> MIDENT
%token <string> TYPEVAR
Expand All @@ -48,20 +58,23 @@ let mk_type_ref fon t_args =
%left COMMA /* (e , e , e) */
%left STAR /* (e * e * e) */

%left below_APP
%nonassoc LPAREN


%type <Parsetree.program> program
%type <Parsetree.top_level list> top_levels
%type <Parsetree.constant> constant
%type <Parsetree.variant> variant
%type <Parsetree.evariant> variant

%type <Parsetree.path> path_dbg
%type <Parsetree.type_expr> type_expr_dbg
%type <Parsetree.ety> type_expr_dbg
%type <Parsetree.mod_expr> mod_expr_dbg
%type <Parsetree.mod_type> mod_type_dbg
%type <Parsetree.emod_ty> mod_type_dbg
%type <Parsetree.expr> expr_dbg
%type <Parsetree.pattern> pattern_dbg

/* Start symbols */
%start program path_dbg type_expr_dbg mod_expr_dbg mod_type_dbg expr_dbg
%start program type_expr_dbg mod_expr_dbg mod_type_dbg expr_dbg pattern_dbg
%%


Expand All @@ -72,35 +85,47 @@ top_levels:
| (* empty *) { [] }
| td=type_def rest=top_levels
{ TopTypeDef td :: rest }
| LET p=pattern EQ e=expr rest=top_levels { TopLet (p, e) :: rest }
| LET x=IDENT EQ e=expr rest=top_levels
{ TopLet (x, e) :: rest }
| LET REC funcs=separated_list(AND, function_bind) rest=top_levels
{ TopLetRec funcs :: rest }
| MODULE m_name=MIDENT
{ TopLetRec funcs :: rest }
| MODULE m_name=MIDENT
EQ m_body=mod_expr rest=top_levels
{ TopMod (m_name, m_body) :: rest }
| MODULE REC functors=separated_list(AND, functor_bind) rest=top_levels { TopModRec functors :: rest } ;
| MODULE TYPE m_name=MIDENT EQ mt=mod_type rest=top_levels
{ TopModSig (m_name, mt) :: rest }
;

mod_expr :
mod_expr:
| LPAREN me=mod_expr RPAREN { me }
| m_name=MIDENT { MEName m_name }
| STRUCT m_body=top_levels END { MEStruct m_body }
| FUNCTOR LPAREN mp=mod_para RPAREN ARROW f_body=mod_expr { MEFunctor (mp, f_body) }

| m = mod_expr DOT n = MIDENT { MEField (m, n) }
| m1 = mod_expr LPAREN m2 = mod_expr RPAREN { MEApply (m1, m2) }
| m1 = mod_expr COLON mt1 = mod_type { MERestrict (m1, mt1) }
;

mod_para :
mod_para :
| m_name=MIDENT COLON m_type=mod_type { (m_name, m_type) }

functor_bind:
| m_name=MIDENT LPAREN mp=mod_para RPAREN ARROW m_body=mod_expr
functor_bind:
| m_name=MIDENT LPAREN mp=mod_para RPAREN ARROW m_body=mod_expr
{ (m_name, (mp, m_body)) }


type_def:
| TYPE LPAREN tvs=separated_list(COMMA, TYPEVAR) RPAREN n=IDENT
EQ vs=separated_list(OR, variant) END { TDAdt (n, tvs, vs) }
| TYPE n=IDENT EQ te=type_expr { TDAlias(n, te) } ;

pattern:
| n=IDENT { PVar n }
type_def:
| TYPE LPAREN tvs=separated_list(COMMA, TYPEVAR) RPAREN n=IDENT
EQ OR? vs=separated_list(OR, variant) %prec over_TOP
{ TDAdt (n, (List.map Ident.from tvs), vs) }

pattern:
| n=IDENT { PVar n } (* variable pattern *)
| c=constant { PVal c }
| c=MIDENT pat=pattern? { PCons (c, pat) }
| m=mod_expr DOT n=MIDENT pat=pattern? { PFieldCons (m, n, pat) }
| LPAREN pats=separated_nontrivial_llist(COMMA, pattern) RPAREN
{ PTuple (pats) }
;

parameter:
| n=IDENT { PBare n }
Expand All @@ -110,80 +135,87 @@ function_bind:
| name=IDENT EQ FUN para=parameter ARROW b=expr
{ (name, (para, b)) }

variant:
| c=MIDENT OF payload=type_expr { (c, Some payload) }
variant:
| c=MIDENT OF payload=type_expr { (c, Some payload) }
| c=MIDENT { (c, None) };

field_def:
field_def:
| n=IDENT COLON t=type_expr { (n, t) }

field_or_name:
| p=path DOT n=IDENT { (Some p, n) }
field_or_name:
| p=mod_expr DOT n=IDENT { (Some p, n) }
| n=IDENT { (None, n) }

type_expr:
type_expr:
| LPAREN t_args = separated_list(COMMA, type_expr) RPAREN fon=field_or_name
{ mk_type_ref fon t_args }
| LPAREN te=type_expr RPAREN { te }
| ts=separated_nontrivial_llist(STAR, type_expr) { TTuple ts }
| t_arg = type_expr fon=field_or_name { mk_type_ref fon [t_arg] }
| n=IDENT { TCons (n, []) }
| tv=TYPEVAR { TVar tv }
| tv=TYPEVAR { TVar (Ident.from tv) }
| arg=type_expr ARROW ret=type_expr { TArrow (arg, ret) }
| LBRACE fields=separated_nontrivial_llist(SEMI, field_def) RBRACE { TRecord fields }

path:
| m_name=MIDENT { MEName m_name }
| m = path DOT n = MIDENT { MEField (m, n) }

expr:
| c=constant { EConst c }
| func=expr arg=expr { EApp (func, arg) } %prec below_APP
| LPAREN e=expr RPAREN { e }
| c=MIDENT { ECons c }
| p=path DOT v=IDENT { EField (p, v) }
| p=path DOT v=MIDENT { EFieldCons (p, v) }
| v=IDENT { EVar v }
| c=MIDENT { ECons c }
| LET p=pattern EQ e1=expr IN e2=expr { ELet (p, e1, e2) }
| v=IDENT { EVar v }
| LET x=IDENT EQ e1=expr IN e2=expr { ELet (x, e1, e2) }
| LET REC binds=separated_nonempty_list(AND, function_bind) IN body=expr { ELetrec (binds, body) }
| IF e0=expr THEN e1=expr ELSE e2=expr { EIf (e0, e1, e2) }
| tu=tuple_expr { tu }
| func=expr arg=expr { EApp (func, arg) }
| LPAREN e=expr RPAREN { e }
| FUN para=parameter ARROW body=expr { ELam (para, body) }
| MATCH e=expr WITH OR? branches=separated_nonempty_list(OR, branch)
{ ECase (e, branches) }
;

tuple_expr:
branch: p=pattern ARROW e=expr { ( p, e ) }

tuple_expr:
| es = separated_nontrivial_llist(COMMA, expr) %prec below_COMMA
{ ETuple es }

path:
| n = MIDENT { PName n }
| p = path DOT n = MIDENT { PMem (p, n) }
| p1 = path LPAREN p2 = path RPAREN { PApply (p1, p2) } ;


mod_type:
mod_type:
| m=MIDENT { MTName m }
| p=path DOT m=MIDENT { MTField (p, m) }
| p=mod_expr DOT m=MIDENT { MTField (p, m) }
| SIG comps=list(sig_comp) END { MTSig comps }
| FUNCTOR
| FUNCTOR
LPAREN p=MIDENT COLON p_ty=mod_type RPAREN
ARROW body=mod_type { MTFunctor (p, p_ty, body) }


sig_comp:
| VAL v=IDENT COLON ty=type_expr { TValueSpec (v, ty) }
| TYPE t=IDENT { TAbstTySpec t }
| TYPE LPAREN tvs=separated_list(COMMA, TYPEVAR) RPAREN t=IDENT
{ TAbstTySpec (t, (List.map Ident.from tvs)) }
| def=type_def { TManiTySpec def }
| MODULE m_name=MIDENT EQ mt=mod_type { TModSpec (m_name, mt) }
;

constant:
constant:
| i = INT { CInt i }
| b = BOOL { CBool b }
| s = STRING { CString s } ;

(* debug rules: which are normal rules append with an eof *)
path_dbg:
| p=path EOF { p }

type_expr_dbg:
type_expr_dbg:
| te=type_expr EOF { te }

mod_expr_dbg:
| me=mod_expr EOF { me }

pattern_dbg:
| p=pattern EOF { p }

mod_type_dbg:
| me=mod_type EOF { me }

Expand Down
Loading

0 comments on commit 1470816

Please sign in to comment.