Skip to content

Commit

Permalink
Initial definition of lexer, parser, language, file type
Browse files Browse the repository at this point in the history
  • Loading branch information
jansorg committed Feb 15, 2021
1 parent ae52a6a commit 12fba0f
Show file tree
Hide file tree
Showing 201 changed files with 8,591 additions and 27 deletions.
378 changes: 378 additions & 0 deletions .editorconfig

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions .run/Run Lexer Tests.run.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Run Lexer Tests" type="GradleRunConfiguration" factoryName="Gradle">
<ExternalSystemSettings>
<option name="executionName" />
<option name="externalProjectPath" value="$PROJECT_DIR$" />
<option name="externalSystemIdString" value="GRADLE" />
<option name="scriptParameters" value="--tests &quot;dev.monogon.cue.lang.lexer.*&quot;" />
<option name="taskDescriptions">
<list />
</option>
<option name="taskNames">
<list>
<option value=":generateLexer" />
<option value=":test" />
</list>
</option>
<option name="vmOptions" value="" />
</ExternalSystemSettings>
<ExternalSystemDebugServerProcess>false</ExternalSystemDebugServerProcess>
<ExternalSystemReattachDebugProcess>true</ExternalSystemReattachDebugProcess>
<DebugAllEnabled>false</DebugAllEnabled>
<method v="2" />
</configuration>
</component>
24 changes: 24 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,30 @@

**CUE Language** support for the IntelliJ platform.

## Development
### IDE
Development is best in IntelliJ IDEA.

The following plugins are required for development:

- [GrammarKit 2020.3.1](https://plugins.jetbrains.com/plugin/6606-grammar-kit)
- Gradle
- Kotlin, for Gradle build file support

### Lexer
The lexer is generated by JFlex. The definition is at `src/grammar/cue.flex`.

The following command regenerates the lexer:
```bash
./gradlew generateLexer
```
### Parser
The parser is generated with JetBrains' GrammarKit. GrammarKit is a plugin for IntelliJ IDEA.
The definition is at `src/grammar/cue.bnf`.

To update the parser and all related classes, open the `cue.bnf` file in your IDE and choose `Generate Parser` in the context menu of the editor.

## Useful Link

- [CUE Website](https://cuelang.org/)
- [The CUE Language Specification](https://cuelang.org/docs/references/spec/)
48 changes: 25 additions & 23 deletions build.gradle.kts
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
import org.jetbrains.changelog.closure
import org.jetbrains.changelog.markdownToHTML

plugins {
// Java support
id("java")
// gradle-intellij-plugin - read more: https://github.com/JetBrains/gradle-intellij-plugin
id("org.jetbrains.intellij") version "0.6.5"
// gradle-changelog-plugin - read more: https://github.com/JetBrains/gradle-changelog-plugin
plugins { // Java support
id("java") // gradle-intellij-plugin - read more: https://github.com/JetBrains/gradle-intellij-plugin
id("org.jetbrains.intellij") version "0.6.5" // gradle-changelog-plugin - read more: https://github.com/JetBrains/gradle-changelog-plugin
id("org.jetbrains.changelog") version "1.1.1"
id("org.jetbrains.grammarkit") version "2020.3.2"
}

// Import variables from gradle.properties file
val pluginGroup: String by project
// `pluginName_` variable ends with `_` because of the collision with Kotlin magic getter in the `intellij` closure.
// Read more about the issue: https://github.com/JetBrains/intellij-platform-plugin-template/issues/29
val pluginGroup: String by project // `pluginName_` variable ends with `_` because of the collision with Kotlin magic getter in the `intellij` closure. // Read more about the issue: https://github.com/JetBrains/intellij-platform-plugin-template/issues/29
val pluginName_: String by project
val pluginVersion: String by project
val pluginSinceBuild: String by project
Expand All @@ -34,6 +30,11 @@ repositories {
jcenter()
}

// setup additional source folders
sourceSets.main {
java.srcDir("src/main/java-gen")
}

// Configure gradle-intellij-plugin plugin.
// Read more: https://github.com/JetBrains/gradle-intellij-plugin
intellij {
Expand All @@ -53,8 +54,7 @@ changelog {
version = pluginVersion
}

tasks {
// disable building searchable options to speed up build, we currently don't settings UI
tasks { // disable building searchable options to speed up build, we currently don't settings UI
buildSearchableOptions {
enabled = false
}
Expand All @@ -71,18 +71,14 @@ tasks {
untilBuild(pluginUntilBuild)

// Extract the <!-- Plugin description --> section from README.md and provide for the plugin's manifest
pluginDescription(
closure {
File("./plugin-description.md").readText().run { markdownToHTML(this) }
}
)
pluginDescription(closure {
File("./plugin-description.md").readText().run { markdownToHTML(this) }
})

// Get the latest available change notes from the changelog file
changeNotes(
closure {
changelog.getLatest().toHTML()
}
)
changeNotes(closure {
changelog.getLatest().toHTML()
})
}

runPluginVerifier {
Expand All @@ -91,10 +87,16 @@ tasks {

publishPlugin {
dependsOn("patchChangelog")
token(System.getenv("PUBLISH_TOKEN"))
// pluginVersion is based on the SemVer (https://semver.org) and supports pre-release labels, like 2.1.7-alpha.3
token(System.getenv("PUBLISH_TOKEN")) // pluginVersion is based on the SemVer (https://semver.org) and supports pre-release labels, like 2.1.7-alpha.3
// Specify pre-release label to publish the plugin in a custom Release Channel automatically. Read more:
// https://plugins.jetbrains.com/docs/intellij/deployment.html#specifying-a-release-channel
channels(pluginVersion.split('-').getOrElse(1) { "default" }.split('.').first())
}
}

tasks.register<org.jetbrains.grammarkit.tasks.GenerateLexer>("generateLexer") {
source = "src/grammar/cue.flex"
targetDir = "src/main/java-gen/dev/monogon/cue/lang/lexer"
targetClass = "_CueLexerGen"
purgeOldFiles = true
}
169 changes: 169 additions & 0 deletions src/grammar/cue.bnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/*
GrammarKit BNF grammar for CUE.
It tries to remain as close as possible to the original grammar.
Please not the following important differences:
- CUE has ":" to start a rule, GrammarKit has "::="
- in CUE's grammar {a} means 0 or more "a", but in GrammarKit {} it's just a grouping element.
GrammarKit's most-similar equivalent: {}*
- GrammarKit doesn't support left-recursive rules
*/

{
generate=[java="8"]
consumeTokenMethod(".*")="consumeTokenFast"
generateTokenAccessors=false

parserClass="dev.monogon.cue.lang.parser.CueParser"
parserUtilClass="dev.monogon.cue.lang.parser.CueParserUtil"

tokenTypeClass="dev.monogon.cue.lang.CueTokenType"
elementTypeHolderClass="dev.monogon.cue.lang.CueTypes"
elementTypeClass="dev.monogon.cue.lang.psi.CueCompositeElementType"

implements="dev.monogon.cue.lang.psi.CueCompositeElement"
extends="dev.monogon.cue.lang.psi.CueCompositeElementImpl"

psiClassPrefix="Cue"
psiImplClassSuffix="Impl"
psiPackage="dev.monogon.cue.lang.psi"
psiImplPackage="dev.monogon.cue.lang.psi.impl"

tokens=[
NEWLINE = '\n'

COMMA = ','
IDENTIFIER = 'IDENTIFIER'
INT_LIT = 'INT_LIT'
FLOAT_LIT = 'FLOAT_LIT'
NULL_LIT = 'NULL_LIT'
BOOL_LIT = 'BOOL_LIT'
KEYWORD = "KEYWORD"

REL_OP = "REL_OP"
ADD_OP = "ADD_OP"
MUL_OP = "MUL_OP"
PIPE = "|"
AMP = "&"
PIPE_PIPE = "||"
AMP_AMP = "&&"
EQ_EQ = "=="
OPERATOR = "OPERATOR"

// we're using _END tokens for string terminating tokens,
// because CueCommaInsertingLexer needs to know where string literals end
SINGLE_QUOTE = "SINGLE_QUOTE"
SINGLE_QUOTE_END = "SINGLE_QUOTE_END"
DOUBLE_QUOTE = "DOUBLE_QUOTE"
DOUBLE_QUOTE_END = "DOUBLE_QUOTE_END"
UNICODE_VALUE = "UNICODE_VALUE"
BYTE_VALUE = "BYTE_VALUE"
MULTILINE_STRING_START = "\"\"\""
MULTILINE_STRING_END = "\"\"\""
MULTILINE_BYTES_START = "'''"
MULTILINE_BYTES_END = "'''"

INTERPOLATION_START = "\\("
INTERPOLATION_END = ")"

LEFT_CURLY="{"
RIGHT_CURLY="}"
COLON=":"
ELLIPSIS_TOKEN="..."
EQ="="
QMARK="?"
LEFT_BRACKET="["
RIGHT_BRACKET="]"
AT="@"
LEFT_PAREN="("
RIGHT_PAREN=")"
]
}

// https://cuelang.org/docs/references/spec/#source-file-organization
private file ::= [ PackageClause "," ] { ImportDecl "," }* { Declaration "," }*
PackageClause ::= "package" PackageName
private PackageName ::= IDENTIFIER

ImportDecl ::= "import" ( ImportSpec | "(" { ImportSpec "," }* ")" )
ImportSpec ::= [ PackageName ] ImportPath
ImportLocation ::= { UNICODE_VALUE }*
ImportPath ::= "\"" ImportLocation [ ":" IDENTIFIER ] "\""

simple_string_lit ::= DOUBLE_QUOTE { UNICODE_VALUE | interpolation }* DOUBLE_QUOTE_END {extends=Literal}
simple_bytes_lit ::= SINGLE_QUOTE { UNICODE_VALUE | interpolation }* SINGLE_QUOTE_END {extends=Literal}
// fixme it might be good to keep escaped chars as tokens, not just UNICODE_VALUE, for highlighting and error reporting
multiline_string_lit ::= MULTILINE_STRING_START NEWLINE { UNICODE_VALUE | interpolation | NEWLINE }* NEWLINE* MULTILINE_STRING_END {extends=Literal}
// fixme it might be good to keep escaped chars as tokens, not just UNICODE_VALUE, for highlighting and error reporting
multiline_bytes_lit ::= MULTILINE_BYTES_START NEWLINE { UNICODE_VALUE | BYTE_VALUE | interpolation | NEWLINE }* NEWLINE* MULTILINE_BYTES_END {extends=Literal}

interpolation ::= INTERPOLATION_START Expression INTERPOLATION_END

private string_lit ::= simple_string_lit
| multiline_string_lit
| simple_bytes_lit
| multiline_bytes_lit
| "#" string_lit "#"

// https://cuelang.org/docs/references/spec/#structs
StructLit ::= "{" { Declaration "," }* "}" {extends=Literal}
Declaration ::= Field | Ellipsis | Embedding | LetClause | attribute
Ellipsis ::= "..." [ Expression ] {extends=Declaration}
Embedding ::= Comprehension | AliasExpr {extends=Declaration}
Field ::= Label ":" { Label ":" }* Expression { attribute }* {extends=Declaration}
Label ::= [ IDENTIFIER "=" ] LabelExpr
LabelExpr ::= LabelName [ "?" ] | "[" AliasExpr "]"
private LabelName ::= IDENTIFIER | simple_string_lit

attribute ::= "@" IDENTIFIER "(" attr_tokens ")"
attr_tokens ::= { <<attr_token>> // fixme psi element for attr_token?
| "(" attr_tokens ")"
| "[" attr_tokens "]"
| "{" attr_tokens "}"
}*

// https://cuelang.org/docs/references/spec/#attributes
AliasExpr ::= Expression | IDENTIFIER "=" Expression { extends=Expression }

// fixme added closing ] at the end, bug in grammar
ListLit ::= "[" [ ElementList [ "," [ Ellipsis ] ] [ "," ] ] "]" {extends=Literal}
ElementList ::= Embedding { "," Embedding }*

// https://cuelang.org/docs/references/spec/#expressions
Operand ::= Literal | OperandName | "(" Expression ")" {extends=PrimaryExpr}
Literal ::= BasicLit | ListLit | StructLit {extends=Operand}
BasicLit ::= INT_LIT | FLOAT_LIT | string_lit | NULL_LIT | BOOL_LIT | BOTTOM_LIT | TOP_LIT {extends=Literal}
OperandName ::= IDENTIFIER | QualifiedIdent {extends=Operand}

QualifiedIdent ::= PackageName "." IDENTIFIER {extends=Operand}

// https://cuelang.org/docs/references/spec/#primary-expressions
//PrimaryExpr ::= Operand | PrimaryExpr Selector | PrimaryExpr Index | PrimaryExpr Slice | PrimaryExpr Arguments
// fixme this is a simple rewrite as non-left-recursive for now
PrimaryExpr ::= Operand {Selector | Index | Slice | Arguments}* {extends=Expression}

Selector ::= "." (IDENTIFIER | simple_string_lit) {extends=PrimaryExpr}
Index ::= "[" Expression "]" {extends=PrimaryExpr}
Argument ::= Expression {extends=PrimaryExpr}
Arguments ::= "(" [ ( Argument { "," Argument }* ) [ "," ] ] ")" {extends=PrimaryExpr}
// fixme Slice is missing

// https://cuelang.org/docs/references/spec/#operators
Expression ::= UnaryExpr | BinaryExpr // fixme extra root?
UnaryExpr ::= PrimaryExpr | unary_op UnaryExpr { extends=Expression }
BinaryExpr ::= Expression binary_op Expression { extends=Expression }

private binary_op ::= PIPE | AMP | PIPE_PIPE | AMP_AMP | EQ_EQ | rel_op | add_op | mul_op //"|" | "&" | "||" | "&&" | "==" | rel_op | add_op | mul_op
private rel_op ::= REL_OP //"!=" | "<" | "<=" | ">" | ">=" | "=~" | "!~"
private add_op ::= ADD_OP //"+" | "-"
private mul_op ::= MUL_OP //"*" | "/" | "div" | "mod" | "quo" | "rem"
private unary_op ::= ADD_OP | "!" | "*" | rel_op // "+" | "-" | "!" | "*" | rel_op

// https://cuelang.org/docs/references/spec/#comprehensions
Comprehension ::= Clauses StructLit

Clauses ::= StartClause { [ "," ] Clause }*
StartClause ::= ForClause | GuardClause
Clause ::= StartClause | LetClause
ForClause ::= "for" IDENTIFIER [ "," IDENTIFIER ] "in" Expression
GuardClause ::= "if" Expression
LetClause ::= "let" IDENTIFIER "=" Expression
Loading

0 comments on commit 12fba0f

Please sign in to comment.