Initial definition of lexer, parser, language, file type

monogon-dev · Feb 15, 2021 · 12fba0f · 12fba0f
1 parent ae52a6a
commit 12fba0f
Show file tree

Hide file tree

Showing 201 changed files with 8,591 additions and 27 deletions.
diff --git a/.editorconfig b/.editorconfig
diff --git a/.run/Run Lexer Tests.run.xml b/.run/Run Lexer Tests.run.xml
@@ -0,0 +1,24 @@
+<component name="ProjectRunConfigurationManager">
+  <configuration default="false" name="Run Lexer Tests" type="GradleRunConfiguration" factoryName="Gradle">
+    <ExternalSystemSettings>
+      <option name="executionName" />
+      <option name="externalProjectPath" value="$PROJECT_DIR$" />
+      <option name="externalSystemIdString" value="GRADLE" />
+      <option name="scriptParameters" value="--tests &quot;dev.monogon.cue.lang.lexer.*&quot;" />
+      <option name="taskDescriptions">
+        <list />
+      </option>
+      <option name="taskNames">
+        <list>
+          <option value=":generateLexer" />
+          <option value=":test" />
+        </list>
+      </option>
+      <option name="vmOptions" value="" />
+    </ExternalSystemSettings>
+    <ExternalSystemDebugServerProcess>false</ExternalSystemDebugServerProcess>
+    <ExternalSystemReattachDebugProcess>true</ExternalSystemReattachDebugProcess>
+    <DebugAllEnabled>false</DebugAllEnabled>
+    <method v="2" />
+  </configuration>
+</component>
diff --git a/README.md b/README.md
@@ -2,6 +2,30 @@
 
 **CUE Language** support for the IntelliJ platform.
 
+## Development
+### IDE
+Development is best in IntelliJ IDEA.
+
+The following plugins are required for development:
+
+- [GrammarKit 2020.3.1](https://plugins.jetbrains.com/plugin/6606-grammar-kit)
+- Gradle
+- Kotlin, for Gradle build file support
+
+### Lexer
+The lexer is generated by JFlex. The definition is at `src/grammar/cue.flex`.
+
+The following command regenerates the lexer:
+```bash
+./gradlew generateLexer
+```
+### Parser
+The parser is generated with JetBrains' GrammarKit. GrammarKit is a plugin for IntelliJ IDEA.
+The definition is at `src/grammar/cue.bnf`.
+
+To update the parser and all related classes, open the `cue.bnf` file in your IDE and choose `Generate Parser` in the context menu of the editor.
+
 ## Useful Link
+
 - [CUE Website](https://cuelang.org/)
 - [The CUE Language Specification](https://cuelang.org/docs/references/spec/)
diff --git a/build.gradle.kts b/build.gradle.kts
@@ -1,19 +1,15 @@
 import org.jetbrains.changelog.closure
 import org.jetbrains.changelog.markdownToHTML
 
-plugins {
-    // Java support
-    id("java")
-    // gradle-intellij-plugin - read more: https://github.com/JetBrains/gradle-intellij-plugin
-    id("org.jetbrains.intellij") version "0.6.5"
-    // gradle-changelog-plugin - read more: https://github.com/JetBrains/gradle-changelog-plugin
+plugins { // Java support
+    id("java") // gradle-intellij-plugin - read more: https://github.com/JetBrains/gradle-intellij-plugin
+    id("org.jetbrains.intellij") version "0.6.5" // gradle-changelog-plugin - read more: https://github.com/JetBrains/gradle-changelog-plugin
     id("org.jetbrains.changelog") version "1.1.1"
+    id("org.jetbrains.grammarkit") version "2020.3.2"
 }
 
 // Import variables from gradle.properties file
-val pluginGroup: String by project
-// `pluginName_` variable ends with `_` because of the collision with Kotlin magic getter in the `intellij` closure.
-// Read more about the issue: https://github.com/JetBrains/intellij-platform-plugin-template/issues/29
+val pluginGroup: String by project // `pluginName_` variable ends with `_` because of the collision with Kotlin magic getter in the `intellij` closure. // Read more about the issue: https://github.com/JetBrains/intellij-platform-plugin-template/issues/29
 val pluginName_: String by project
 val pluginVersion: String by project
 val pluginSinceBuild: String by project
@@ -34,6 +30,11 @@ repositories {
     jcenter()
 }
 
+// setup additional source folders
+sourceSets.main {
+    java.srcDir("src/main/java-gen")
+}
+
 // Configure gradle-intellij-plugin plugin.
 // Read more: https://github.com/JetBrains/gradle-intellij-plugin
 intellij {
@@ -53,8 +54,7 @@ changelog {
     version = pluginVersion
 }
 
-tasks {
-    // disable building searchable options to speed up build, we currently don't settings UI
+tasks { // disable building searchable options to speed up build, we currently don't settings UI
     buildSearchableOptions {
         enabled = false
     }
@@ -71,18 +71,14 @@ tasks {
         untilBuild(pluginUntilBuild)
 
         // Extract the <!-- Plugin description --> section from README.md and provide for the plugin's manifest
-        pluginDescription(
-            closure {
-                File("./plugin-description.md").readText().run { markdownToHTML(this) }
-            }
-        )
+        pluginDescription(closure {
+            File("./plugin-description.md").readText().run { markdownToHTML(this) }
+        })
 
         // Get the latest available change notes from the changelog file
-        changeNotes(
-            closure {
-                changelog.getLatest().toHTML()
-            }
-        )
+        changeNotes(closure {
+            changelog.getLatest().toHTML()
+        })
     }
 
     runPluginVerifier {
@@ -91,10 +87,16 @@ tasks {
 
     publishPlugin {
         dependsOn("patchChangelog")
-        token(System.getenv("PUBLISH_TOKEN"))
-        // pluginVersion is based on the SemVer (https://semver.org) and supports pre-release labels, like 2.1.7-alpha.3
+        token(System.getenv("PUBLISH_TOKEN")) // pluginVersion is based on the SemVer (https://semver.org) and supports pre-release labels, like 2.1.7-alpha.3
         // Specify pre-release label to publish the plugin in a custom Release Channel automatically. Read more:
         // https://plugins.jetbrains.com/docs/intellij/deployment.html#specifying-a-release-channel
         channels(pluginVersion.split('-').getOrElse(1) { "default" }.split('.').first())
     }
 }
+
+tasks.register<org.jetbrains.grammarkit.tasks.GenerateLexer>("generateLexer") {
+    source = "src/grammar/cue.flex"
+    targetDir = "src/main/java-gen/dev/monogon/cue/lang/lexer"
+    targetClass = "_CueLexerGen"
+    purgeOldFiles = true
+}
diff --git a/src/grammar/cue.bnf b/src/grammar/cue.bnf
@@ -0,0 +1,169 @@
+/*
+GrammarKit BNF grammar for CUE.
+It tries to remain as close as possible to the original grammar.
+Please not the following important differences:
+- CUE has ":" to start a rule, GrammarKit has "::="
+- in CUE's grammar {a} means 0 or more "a", but in GrammarKit {} it's just a grouping element.
+  GrammarKit's most-similar equivalent: {}*
+- GrammarKit doesn't support left-recursive rules
+ */
+
+{
+    generate=[java="8"]
+    consumeTokenMethod(".*")="consumeTokenFast"
+    generateTokenAccessors=false
+
+    parserClass="dev.monogon.cue.lang.parser.CueParser"
+    parserUtilClass="dev.monogon.cue.lang.parser.CueParserUtil"
+
+    tokenTypeClass="dev.monogon.cue.lang.CueTokenType"
+    elementTypeHolderClass="dev.monogon.cue.lang.CueTypes"
+    elementTypeClass="dev.monogon.cue.lang.psi.CueCompositeElementType"
+
+    implements="dev.monogon.cue.lang.psi.CueCompositeElement"
+    extends="dev.monogon.cue.lang.psi.CueCompositeElementImpl"
+
+    psiClassPrefix="Cue"
+    psiImplClassSuffix="Impl"
+    psiPackage="dev.monogon.cue.lang.psi"
+    psiImplPackage="dev.monogon.cue.lang.psi.impl"
+
+    tokens=[
+        NEWLINE    = '\n'
+
+        COMMA       = ','
+        IDENTIFIER  = 'IDENTIFIER'
+        INT_LIT     = 'INT_LIT'
+        FLOAT_LIT   = 'FLOAT_LIT'
+        NULL_LIT    = 'NULL_LIT'
+        BOOL_LIT    = 'BOOL_LIT'
+        KEYWORD     = "KEYWORD"
+
+        REL_OP      = "REL_OP"
+        ADD_OP      = "ADD_OP"
+        MUL_OP      = "MUL_OP"
+        PIPE        = "|"
+        AMP         = "&"
+        PIPE_PIPE   = "||"
+        AMP_AMP     = "&&"
+        EQ_EQ       = "=="
+        OPERATOR    = "OPERATOR"
+
+        // we're using _END tokens for string terminating tokens,
+        // because CueCommaInsertingLexer needs to know where string literals end
+        SINGLE_QUOTE = "SINGLE_QUOTE"
+        SINGLE_QUOTE_END = "SINGLE_QUOTE_END"
+        DOUBLE_QUOTE = "DOUBLE_QUOTE"
+        DOUBLE_QUOTE_END = "DOUBLE_QUOTE_END"
+        UNICODE_VALUE = "UNICODE_VALUE"
+        BYTE_VALUE = "BYTE_VALUE"
+        MULTILINE_STRING_START = "\"\"\""
+        MULTILINE_STRING_END = "\"\"\""
+        MULTILINE_BYTES_START = "'''"
+        MULTILINE_BYTES_END = "'''"
+
+        INTERPOLATION_START = "\\("
+        INTERPOLATION_END = ")"
+
+        LEFT_CURLY="{"
+        RIGHT_CURLY="}"
+        COLON=":"
+        ELLIPSIS_TOKEN="..."
+        EQ="="
+        QMARK="?"
+        LEFT_BRACKET="["
+        RIGHT_BRACKET="]"
+        AT="@"
+        LEFT_PAREN="("
+        RIGHT_PAREN=")"
+    ]
+}
+
+// https://cuelang.org/docs/references/spec/#source-file-organization
+private file ::= [ PackageClause "," ]  { ImportDecl "," }* { Declaration "," }*
+PackageClause  ::= "package" PackageName
+private PackageName ::= IDENTIFIER
+
+ImportDecl       ::= "import" ( ImportSpec | "(" { ImportSpec "," }* ")" )
+ImportSpec       ::= [ PackageName ] ImportPath
+ImportLocation   ::= { UNICODE_VALUE }*
+ImportPath       ::= "\"" ImportLocation [ ":" IDENTIFIER ] "\""
+
+simple_string_lit ::= DOUBLE_QUOTE { UNICODE_VALUE | interpolation }* DOUBLE_QUOTE_END {extends=Literal}
+simple_bytes_lit ::=  SINGLE_QUOTE { UNICODE_VALUE | interpolation }* SINGLE_QUOTE_END {extends=Literal}
+// fixme it might be good to keep escaped chars as tokens, not just UNICODE_VALUE, for highlighting and error reporting
+multiline_string_lit ::= MULTILINE_STRING_START NEWLINE { UNICODE_VALUE | interpolation | NEWLINE }* NEWLINE* MULTILINE_STRING_END {extends=Literal}
+// fixme it might be good to keep escaped chars as tokens, not just UNICODE_VALUE, for highlighting and error reporting
+multiline_bytes_lit ::= MULTILINE_BYTES_START NEWLINE { UNICODE_VALUE | BYTE_VALUE | interpolation | NEWLINE }* NEWLINE* MULTILINE_BYTES_END {extends=Literal}
+
+interpolation ::= INTERPOLATION_START Expression INTERPOLATION_END
+
+private string_lit ::= simple_string_lit
+                   | multiline_string_lit
+                   | simple_bytes_lit
+                   | multiline_bytes_lit
+                   | "#" string_lit "#"
+
+// https://cuelang.org/docs/references/spec/#structs
+StructLit       ::= "{" { Declaration "," }* "}" {extends=Literal}
+Declaration     ::= Field | Ellipsis | Embedding | LetClause | attribute
+Ellipsis        ::= "..." [ Expression ] {extends=Declaration}
+Embedding       ::= Comprehension | AliasExpr {extends=Declaration}
+Field           ::= Label ":" { Label ":" }* Expression { attribute }* {extends=Declaration}
+Label           ::= [ IDENTIFIER "=" ] LabelExpr
+LabelExpr       ::= LabelName [ "?" ] | "[" AliasExpr "]"
+private LabelName       ::= IDENTIFIER | simple_string_lit
+
+attribute       ::= "@" IDENTIFIER "(" attr_tokens ")"
+attr_tokens     ::= { <<attr_token>> // fixme psi element for attr_token?
+                      | "(" attr_tokens ")"
+                      | "[" attr_tokens "]"
+                      | "{" attr_tokens "}"
+                    }*
+
+// https://cuelang.org/docs/references/spec/#attributes
+AliasExpr  ::= Expression | IDENTIFIER "=" Expression { extends=Expression }
+
+// fixme added closing ] at the end, bug in grammar
+ListLit       ::= "[" [ ElementList [ "," [ Ellipsis ] ] [ "," ] ] "]" {extends=Literal}
+ElementList   ::= Embedding { "," Embedding }*
+
+// https://cuelang.org/docs/references/spec/#expressions
+Operand     ::= Literal | OperandName | "(" Expression ")" {extends=PrimaryExpr}
+Literal     ::= BasicLit | ListLit | StructLit {extends=Operand}
+BasicLit    ::= INT_LIT | FLOAT_LIT | string_lit | NULL_LIT | BOOL_LIT | BOTTOM_LIT | TOP_LIT {extends=Literal}
+OperandName ::= IDENTIFIER | QualifiedIdent {extends=Operand}
+
+QualifiedIdent ::= PackageName "." IDENTIFIER {extends=Operand}
+
+// https://cuelang.org/docs/references/spec/#primary-expressions
+//PrimaryExpr ::= Operand | PrimaryExpr Selector | PrimaryExpr Index | PrimaryExpr Slice | PrimaryExpr Arguments
+// fixme this is a simple rewrite as non-left-recursive for now
+PrimaryExpr ::= Operand {Selector | Index | Slice | Arguments}* {extends=Expression}
+
+Selector       ::= "." (IDENTIFIER | simple_string_lit) {extends=PrimaryExpr}
+Index          ::= "[" Expression "]" {extends=PrimaryExpr}
+Argument       ::= Expression {extends=PrimaryExpr}
+Arguments      ::= "(" [ ( Argument { "," Argument }* ) [ "," ] ] ")" {extends=PrimaryExpr}
+// fixme Slice is missing
+
+// https://cuelang.org/docs/references/spec/#operators
+Expression ::= UnaryExpr | BinaryExpr // fixme extra root?
+UnaryExpr  ::= PrimaryExpr | unary_op UnaryExpr { extends=Expression }
+BinaryExpr ::= Expression binary_op Expression { extends=Expression }
+
+private binary_op  ::= PIPE | AMP | PIPE_PIPE | AMP_AMP | EQ_EQ | rel_op | add_op | mul_op //"|" | "&" | "||" | "&&" | "==" | rel_op | add_op | mul_op
+private rel_op     ::= REL_OP //"!=" | "<" | "<=" | ">" | ">=" | "=~" | "!~"
+private add_op     ::= ADD_OP //"+" | "-"
+private mul_op     ::= MUL_OP //"*" | "/" | "div" | "mod" | "quo" | "rem"
+private unary_op   ::= ADD_OP | "!" | "*" | rel_op // "+" | "-" | "!" | "*" | rel_op
+
+// https://cuelang.org/docs/references/spec/#comprehensions
+Comprehension       ::= Clauses StructLit
+
+Clauses             ::= StartClause { [ "," ] Clause }*
+StartClause         ::= ForClause | GuardClause
+Clause              ::= StartClause | LetClause
+ForClause           ::= "for" IDENTIFIER [ "," IDENTIFIER ] "in" Expression
+GuardClause         ::= "if" Expression
+LetClause           ::= "let" IDENTIFIER "=" Expression