Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Menhir parser #1295

Draft
wants to merge 151 commits into
base: master
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
151 commits
Select commit Hold shift + click to select a range
6958fd5
convert lexer to use menhir tokens
FayCarsons Aug 23, 2024
6654f63
lexer uses new tokens
FayCarsons Aug 26, 2024
333e446
unwrap tokens received from lexer, notes
FayCarsons Aug 26, 2024
1ba3a63
remove modules directive from parser/test/dune
FayCarsons Aug 28, 2024
21135d8
Add media tokens, null parser will compile, separate types and utils
FayCarsons Aug 28, 2024
a42097c
trivial/whitespace tests passing
FayCarsons Aug 29, 2024
a2a9083
Add inline element and list rules
FayCarsons Sep 2, 2024
36befac
Fix naming in list rule, add dummy 'nestable block element' rule
FayCarsons Sep 2, 2024
54fef41
Refactor 'tag' rule to support variants w/ children
FayCarsons Sep 2, 2024
68cb3b0
Rewrite 'located' parser util to use new menhir syntax; formatting
FayCarsons Sep 2, 2024
0c4f9c1
move tag parser helpers to Menhir header
FayCarsons Sep 2, 2024
1408d3f
Add heavy list and nestable element rules
FayCarsons Sep 3, 2024
d255bde
Use `located` modifier, formatting
FayCarsons Sep 3, 2024
6fa50f2
Add rules to `main`
FayCarsons Sep 3, 2024
146e4cc
Pass current filename to Menhir parser so that it is used in AST loca…
FayCarsons Sep 4, 2024
7f32a00
refactor exception handling + debug exns
FayCarsons Sep 4, 2024
bcb99df
Add printer for Debug exceptions
FayCarsons Sep 4, 2024
b5287d5
Add rule for headers
FayCarsons Sep 5, 2024
ee2bc70
Refactor table tokens to follow Menhir all-caps convention
FayCarsons Sep 5, 2024
aa5d1f4
Add table rules
FayCarsons Sep 5, 2024
44e5cb6
Simplify and add comment for `unwrapped_token` in `Odoc_parser.parse_…
FayCarsons Sep 6, 2024
6934b99
Clean up `nestable_block_element` rule producers, list rule
FayCarsons Sep 6, 2024
a953140
Add section delimiting comments, clean up `ref` producers
FayCarsons Sep 6, 2024
4c5fefd
Add braces and whitespace to producers
FayCarsons Sep 6, 2024
76a6c64
Refactor toplevels, add delimiters where missing, format
FayCarsons Sep 6, 2024
1377328
Formatting, fix `list_heavy` rule, swap empty + nonempty lists where …
FayCarsons Sep 6, 2024
bf52c6c
remove unused `COMMENT` token
FayCarsons Sep 6, 2024
31e7b96
Remove reference to `COMMENT` token in `parser_utils.ml`
FayCarsons Sep 6, 2024
bf33d8e
Add (previously forgotten) end-of-input handling :3
FayCarsons Sep 10, 2024
5ab9a68
Add toplevel whitespace handling
FayCarsons Sep 11, 2024
9d672be
Incomplete rules for light table syntax
FayCarsons Sep 12, 2024
39fc8d0
Light table syntax - NOT COMPILING
FayCarsons Sep 13, 2024
ba2c3f3
Fix light table type conflict, reduce error remains
FayCarsons Sep 16, 2024
e59d416
Add recovery from invalid align rows, clean up, better naming
FayCarsons Sep 16, 2024
1bc0add
Refactor light table syntax
FayCarsons Sep 17, 2024
1718cca
Remove inline opens in token declaration, add comment explaining alig…
FayCarsons Sep 18, 2024
b07a20a
Cleanup, add TODO comments
FayCarsons Sep 18, 2024
981609e
Further cleanup, add empty table producer
FayCarsons Sep 18, 2024
d53cdf2
Add simple media rule
FayCarsons Sep 18, 2024
ca9ccc7
fix debug error handling
FayCarsons Sep 18, 2024
4b33ea0
Add new token descriptions to parser utils
FayCarsons Sep 19, 2024
7a512f2
fix simple media producer
FayCarsons Sep 19, 2024
d242dda
fix heavy list rule
FayCarsons Sep 19, 2024
2525416
Add whitespace to toplevel, closing brace to `Styled`
FayCarsons Sep 19, 2024
a905a03
Add whitespace handling to heavy list rule
FayCarsons Sep 19, 2024
f122c16
Cleanup, use $sloc vs $loc in `parser.mly`
FayCarsons Sep 20, 2024
fdc2ee6
Rename parser utils and token printer modules
FayCarsons Sep 23, 2024
117c78b
Remove unused `Error` module
FayCarsons Sep 23, 2024
366267d
Fix `Ref` and `Simple_ref` rules
FayCarsons Sep 23, 2024
1e2907e
Fix modules rule
FayCarsons Sep 24, 2024
d4698a1
Fix heavy table rule
FayCarsons Sep 24, 2024
85167f5
fix code block rule
FayCarsons Sep 24, 2024
eb0b0f4
Fix light table parsing
FayCarsons Sep 26, 2024
54ea138
Fixed tagging and consolidation of elements in row processing
FayCarsons Sep 30, 2024
f11faef
Fix row processing
FayCarsons Sep 30, 2024
68eff9e
Cleanup, remove unused, remove comments
FayCarsons Oct 15, 2024
f3704c0
Enforce invariants in parser
FayCarsons Oct 17, 2024
f148816
Refactor tag parsing
FayCarsons Oct 21, 2024
54de5e8
intermediate tree and unpack fn
FayCarsons Nov 15, 2024
ab1dc0c
Tokens working, warning being created in parser
FayCarsons Nov 15, 2024
6595f2c
Squash merge tester-original into intermediate
FayCarsons Nov 25, 2024
c04e42c
Add heavy list item warnings
FayCarsons Nov 25, 2024
b1b3e5d
Handle EOI in heavy list item parsing
FayCarsons Nov 25, 2024
69dba7f
Add all currently handled error cases to test code, fix `sequence`
FayCarsons Nov 25, 2024
cedeb7d
Remove `any` rule
FayCarsons Nov 25, 2024
6a04998
Describe Ast nodes like tokens for error recovery
FayCarsons Nov 25, 2024
fd6d260
remove empty_code_block from ast.ml
FayCarsons Nov 27, 2024
90176a5
Add empty tag warnings
FayCarsons Nov 27, 2024
8858029
add non-empty warnings for nestable block elements
FayCarsons Nov 27, 2024
4f1379c
Squash merge modules into intermediate
FayCarsons Nov 27, 2024
9c83616
break nestable_block and inline_element up into smaller rules
FayCarsons Nov 27, 2024
eb62ed3
Explain and pretty print failures in tester
FayCarsons Dec 2, 2024
e0e0ca6
Annotate tokens in parser
FayCarsons Dec 2, 2024
e28d6dd
Tester includes lexer warnings
FayCarsons Dec 2, 2024
2572423
Error recovery for unexpected EOI working
FayCarsons Dec 2, 2024
e7851a6
Tester with working locations
FayCarsons Dec 3, 2024
7fda51c
Trim start of paragraphs, handle EOF in `{!modules: ...}`
FayCarsons Dec 4, 2024
cbec3b2
Tests fixed
FayCarsons Dec 4, 2024
82e456b
Make light tables simpler and more robust
FayCarsons Dec 4, 2024
8f4482a
Light tables broken for unclear reasons
FayCarsons Dec 6, 2024
b86ffb7
Light tables fixed(?), removed unused `SPACE` token
FayCarsons Dec 9, 2024
8f69af9
Handle more unclosed table cases
FayCarsons Dec 9, 2024
01eb3df
Add error handling for paragraph alignment
FayCarsons Dec 9, 2024
8acacbf
Add location printing fn
FayCarsons Dec 10, 2024
5298af2
Refactor lexer to return unwrapped tokens
FayCarsons Dec 10, 2024
89d7366
improve location handling
FayCarsons Dec 10, 2024
418446e
handle newlines in inline elements
FayCarsons Dec 10, 2024
75a0903
Allow tags w/o children
FayCarsons Dec 10, 2024
f0d5020
Handle stray closing delimiters at toplevel
FayCarsons Dec 11, 2024
f942ad0
Move Parser_aux contents to Loc
FayCarsons Dec 12, 2024
f6b6b80
Remove filename from Writer.warning, function takes input string instead
FayCarsons Dec 12, 2024
1525efd
Add general illegal syntax error for undecidable cases
FayCarsons Dec 13, 2024
f286e81
fix heavy tables, remove unnecessary whitespace preceding RIGHT_BRACE
FayCarsons Dec 17, 2024
182a85f
Location tracking improved
FayCarsons Dec 18, 2024
c2f28cf
Fix toplevel stray brace warning
FayCarsons Dec 18, 2024
27716be
convert polymorphic token types to nominal
FayCarsons Dec 18, 2024
5502b16
remove TODO.md
FayCarsons Dec 18, 2024
c2c27f9
Fix @see kind
FayCarsons Dec 18, 2024
9b8e18f
Trim tag bodies, fix section heading
FayCarsons Dec 18, 2024
d7b5c44
update `Parse_error.illegal` to take description of parent element
FayCarsons Dec 19, 2024
e45ca40
Add EOF handling to References with replacement text
FayCarsons Dec 19, 2024
6507011
TODO
FayCarsons Dec 20, 2024
acbbe4a
Fix leading and trailing whitespace in tags @version, @canonical, @si…
FayCarsons Dec 23, 2024
aed2720
remove unnecessary string trim in `@before` lexing
FayCarsons Dec 23, 2024
3f9884e
Clean up syntax in lexer
FayCarsons Dec 23, 2024
166bb19
Return tokens directly in lexbuf
FayCarsons Dec 23, 2024
59c6ea4
inline_element+nestable_block_element return elt w/ location
FayCarsons Dec 27, 2024
d2737eb
Fix Tag and Math block/span locations
FayCarsons Dec 30, 2024
b598a5c
All block elements return spanning location
FayCarsons Dec 31, 2024
e75caa2
fix light lists
FayCarsons Dec 31, 2024
a91f196
fix '@see' tag
FayCarsons Jan 1, 2025
7908e36
format, add stray tag handling
FayCarsons Jan 2, 2025
66bb9ac
Split `Code_block` into regular and `Code_block_with_output`
FayCarsons Jan 6, 2025
80a7bd3
cover remaining error cases
FayCarsons Jan 6, 2025
77be167
fix leading whitespace error
FayCarsons Jan 6, 2025
3d7d8b7
update tester
FayCarsons Jan 6, 2025
ae835ad
Handle stray dashes, '+', and '-'
FayCarsons Jan 6, 2025
78ec1a4
improve `Paragraph_style` location
FayCarsons Jan 7, 2025
6f75b2f
Fix paragraph splitting on newlines
FayCarsons Jan 7, 2025
441285c
finish location refactor
FayCarsons Jan 9, 2025
5166ec7
cover style errors
FayCarsons Jan 9, 2025
3bf452d
improve light table whitespace handling
FayCarsons Jan 9, 2025
16ad51e
make `light_table` more robust
FayCarsons Jan 9, 2025
34b01cc
Cleanup, break helper functions out into `parser_aux.ml`
FayCarsons Jan 9, 2025
31572d0
Fix `Paragraph_style` location
FayCarsons Jan 10, 2025
603da72
Cleanup, formatting, consistency
FayCarsons Jan 10, 2025
6d51e8e
Improve light list parsing
FayCarsons Jan 11, 2025
108077f
Merge Menhir parser into main
FayCarsons Jan 11, 2025
ebe4c0f
Add Menhir tokens for new tags
FayCarsons Jan 11, 2025
a9daa8c
Remove old files, add parsing rules for new tags
FayCarsons Jan 13, 2025
9424178
refactor tags
FayCarsons Jan 13, 2025
5fc1d05
Make paragraph parsing more robust, lex horizontal space folowing new…
FayCarsons Jan 13, 2025
bd16e11
Fix whitespace in lexer, consume all leading whitespace in comment
FayCarsons Jan 13, 2025
9a1d5a6
Fix heavy list ambiguity
FayCarsons Jan 14, 2025
116987b
Refactor references/media to pass location
FayCarsons Jan 14, 2025
0fff83f
Fix paragraph splitting
FayCarsons Jan 14, 2025
cf9f849
Parameterize nestable block elements over legal paragraphs
FayCarsons Jan 14, 2025
9543168
Add blank line delimiter for tags with block element content
FayCarsons Jan 15, 2025
46c4d0a
Fix heavy list whitespace handling
FayCarsons Jan 15, 2025
2f64b98
fix light list locations
FayCarsons Jan 15, 2025
3e9188e
Actually(!) fix light list item locations
FayCarsons Jan 15, 2025
cd67627
Fix top-level line-break handling
FayCarsons Jan 16, 2025
82f0126
improve light list error handling
FayCarsons Jan 16, 2025
167895b
Clean and add comments
FayCarsons Jan 16, 2025
d1dcf17
Add comments, TODO file
FayCarsons Jan 16, 2025
e7c41b9
fix unclosed links and refs
FayCarsons Jan 16, 2025
2e89908
update lexbuf line from `Code_block` content
FayCarsons Jan 16, 2025
10f9113
Warn empty code_block
FayCarsons Jan 16, 2025
9a11534
Note on location problems
FayCarsons Jan 16, 2025
3f1cb49
Notes in `TODO.md`
FayCarsons Jan 16, 2025
405b59e
Note about emulating context
FayCarsons Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use located modifier, formatting
  • Loading branch information
FayCarsons committed Sep 3, 2024
commit d255bdeac46950493ce34a732c73ac09ec17ff9f
40 changes: 21 additions & 19 deletions src/parser/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ let located(rule) == value = rule; { wrap_location $loc value }

let main :=
| _ = whitespace; { [] }
| t = tag; { [ wrap_location $sloc t ]}
| t = located(tag); { [ t ]}
| END; { [] }
| _ = error; { raise @@ exn_location ~only_for_debugging:( $loc ) }
| _ = error; { raise @@ exn_location ~only_for_debugging:$loc }

let whitespace :=
| SPACE; { `Space " " }
Expand All @@ -125,40 +125,42 @@ let inline_element :=
| ~ = Space; <`Space>
| ~ = Word; <`Word>
| ~ = Code_span; <`Code_span>
| s = Raw_markup; { `Raw_markup ( None, s ) }
| style = Style; inner = inline_element; { `Styled (style, wrap_location $loc inner) }
| ~ = Raw_markup; <`Raw_markup>
| style = Style; inner = located( inline_element ); { `Styled (style, [ inner ]) }
| ~ = Math_span; <`Math_span>
| ~ = ref; <>
| ~ = link; <>

(* TODO: Determine how we want to handle recursive elements like refs and some of the tags that have nestable_block inners
Currently, this is broken *)
let ref :=
| ref_body = Simple_ref; children = inline_element; { `Reference (`Simple, ref_body, [ wrap_location $loc children ]) }
| ref_body = Ref_with_replacement; children = inline_element; { `Reference (`With_text, ref_body, [ wrap_location $loc children ]) }
| ref_body = located(Simple_ref ); children = located( inline_element ); { `Reference (`Simple, ref_body, [ children ]) }
| ref_body = located(Ref_with_replacement); children = located( inline_element ); { `Reference (`With_text, ref_body, [ children ]) }

(* TODO : Fix the `with_replacement` producers in the following two rules, if they're broken. Ask what `with_replacement` refers to *)
let link :=
| link_body = Simple_link; children = inline_element; { `Link ( link_body, [ wrap_location $loc children ] ) }
| link_body = Link_with_replacement; children = inline_element; { `Link ( link_body, [ wrap_location $loc children ]
)}
| link_body = Simple_link; children = located(inline_element); { `Link (link_body, [ children ]) }
| link_body = Link_with_replacement; children = located(inline_element); { `Link (link_body, [ children ]) }

let list_light :=
| MINUS; unordered_items = separated_list(NEWLINE; MINUS, nestable_block_element); { `List (`Unordered, `Light, unordered_items) }
| PLUS; ordered_items = separated_list(NEWLINE; PLUS, nestable_block_element); { `List (`Ordered, `Light, unordered_items) }
| MINUS; unordered_items = separated_list(NEWLINE; MINUS, located(nestable_block_element)); { `List (`Unordered, `Light, [ unordered_items ]) }
| PLUS; ordered_items = separated_list(NEWLINE; PLUS, located(nestable_block_element)); { `List (`Ordered, `Light, [ ordered_items ]) }

let list_heavy :=
| list_type = List;
| list_kind = List;
items = separated_list(
NEWLINE; _ = List_item; SPACE?; RIGHT_BRACE,
located(nestable_block_element)
); { `List (list_kind, `Heavy, items) }

(* NOTE: (@faycarsons) For some reason the inline_element rule isn't type-checking despite having(??) all of the variants in Ast.inline_element *)
); { `List (list_kind, `Heavy, [ items ]) }

let table := error; { raise @@ exn_location ~only_for_debugging:$loc }

let nestable_block_element :=
| code = Verbatim; { `Verbatim code }
| element = inline_element; { `Paragraph [ ( wrap_location $loc element : Ast.inline_element Loc.with_location ) ] }
| element = located( inline_element ); { `Paragraph [ element ] }
| code_block = Code_block; <`Code_block>
| modules = Modules; { `Modules [ wrap_location $loc modules ]}
| modules = located(Modules); { `Modules [ modules ] }
| _ = table; { raise @@ exn_location ~only_for_debugging:$loc }
| _ = Media; { raise @@ exn_location ~only_for_debugging:$loc }
| ~ = Math_block; <`Math_block>
| ~ = list_light; <>
| ~ = list_heavy; <>

Expand Down