From e3662b56e87a2488389b47d2506ba0604adcceec Mon Sep 17 00:00:00 2001 From: Sam Estep Date: Tue, 2 Jan 2024 15:43:48 -0500 Subject: [PATCH] Lex via Moo --- package-lock.json | 13 ++++++ package.json | 1 + packages/rose/package.json | 1 + packages/rose/src/lex.test.ts | 46 +++++++++++++++++++ packages/rose/src/lex.ts | 19 ++++++++ packages/vscode/syntaxes/rose.tmLanguage.json | 11 +++-- 6 files changed, 86 insertions(+), 5 deletions(-) create mode 100644 packages/rose/src/lex.test.ts create mode 100644 packages/rose/src/lex.ts diff --git a/package-lock.json b/package-lock.json index 5ac9a31..c942659 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "packages/*" ], "devDependencies": { + "@types/moo": "^0.5", "@types/node": "^20", "@types/yargs": "^17", "@vscode/vsce": "^2", @@ -414,6 +415,12 @@ "@types/chai": "*" } }, + "node_modules/@types/moo": { + "version": "0.5.9", + "resolved": "https://registry.npmjs.org/@types/moo/-/moo-0.5.9.tgz", + "integrity": "sha512-ZsFVecFi66jGQ6L41TonEaBhsIVeVftTz6iQKWTctzacHhzYHWvv9S0IyAJi4BhN7vb9qCQ3+kpStP2vbZqmDg==", + "dev": true + }, "node_modules/@types/node": { "version": "20.4.2", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.4.2.tgz", @@ -1595,6 +1602,11 @@ "ufo": "^1.1.2" } }, + "node_modules/moo": { + "version": "0.5.2", + "resolved": "https://registry.npmjs.org/moo/-/moo-0.5.2.tgz", + "integrity": "sha512-iSAJLHYKnX41mKcJKjqvnAN9sf0LMDTXDEvFv+ffuRR9a1MIuXLjMNL6EsnDHSkKLTWNqQQ5uo61P4EbU4NU+Q==" + }, "node_modules/ms": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", @@ -2755,6 +2767,7 @@ "version": "0.5.0", "license": "MIT", "dependencies": { + "moo": "^0.5", "yargs": "^17" }, "bin": { diff --git a/package.json b/package.json index 6852ba7..297a25d 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "packages/*" ], "devDependencies": { + "@types/moo": "^0.5", "@types/node": "^20", "@types/yargs": "^17", "@vscode/vsce": "^2", diff --git a/packages/rose/package.json b/packages/rose/package.json index c0791ad..d5e8cba 100644 --- a/packages/rose/package.json +++ b/packages/rose/package.json @@ -19,6 +19,7 @@ "src" ], "dependencies": { + "moo": "^0.5", "yargs": "^17" }, "scripts": { diff --git a/packages/rose/src/lex.test.ts b/packages/rose/src/lex.test.ts new file mode 100644 index 0000000..72cf18d --- /dev/null +++ b/packages/rose/src/lex.test.ts @@ -0,0 +1,46 @@ +import { expect, test } from "vitest"; +import { lexer } from "./lex.js"; + +const tokens = ( + source: string, +): { type: string | undefined; text: string }[] => { + const lex = lexer(); + lex.reset(source); + return [...lex].map(({ type, text }) => ({ type, text })); +}; + +test("comment", () => { + expect( + tokens(`# one comment + +# another comment`), + ).toEqual([ + { type: "comment", text: "# one comment" }, + { type: "space", text: "\n\n" }, + { type: "comment", text: "# another comment" }, + ]); +}); + +test("string", () => { + expect(tokens('"one string" "another string"')).toEqual([ + { type: "str", text: '"one string"' }, + { type: "space", text: " " }, + { type: "str", text: '"another string"' }, + ]); +}); + +test("integer", () => { + expect(tokens("42")).toEqual([{ type: "num", text: "42" }]); +}); + +test("float", () => { + expect(tokens("42.0")).toEqual([{ type: "num", text: "42.0" }]); +}); + +test("number followed by letter", () => { + expect(() => tokens("1a")).toThrow("invalid syntax"); +}); + +test("letter followed by number", () => { + expect(tokens("a1")).toEqual([{ type: "id", text: "a1" }]); +}); diff --git a/packages/rose/src/lex.ts b/packages/rose/src/lex.ts new file mode 100644 index 0000000..f655ac8 --- /dev/null +++ b/packages/rose/src/lex.ts @@ -0,0 +1,19 @@ +import moo from "moo"; + +export const lexer = () => + moo.compile({ + op: /[^\s#\w"\(\)\[\]\{\},\.]+/, + space: { match: /\s+/, lineBreaks: true }, + comment: /#.*?$/, + str: { match: /".*?"/, lineBreaks: true }, + num: /\d+(?:\.\d+)?\b/, + id: /(?!\d)\w+/, + lparen: "(", + rparen: ")", + lbracket: "[", + rbracket: "]", + lbrace: "{", + rbrace: "}", + comma: ",", + dot: ".", + }); diff --git a/packages/vscode/syntaxes/rose.tmLanguage.json b/packages/vscode/syntaxes/rose.tmLanguage.json index 93d19bd..12e71b9 100644 --- a/packages/vscode/syntaxes/rose.tmLanguage.json +++ b/packages/vscode/syntaxes/rose.tmLanguage.json @@ -50,12 +50,13 @@ "literals": { "patterns": [ { - "name": "constant.numeric.rose", - "match": "\\b[0-9]+(\\.[0-9]+)?\\b" + "name": "string.quoted.double.rose", + "begin": "\"", + "end": "\"" }, { - "name": "string.quoted.double.rose", - "match": "\"([^\"\\\\]|\\\\.)*\"" + "name": "constant.numeric.rose", + "match": "\\b\\d+(\\.\\d+)?\\b" }, { "name": "constant.language.rose", @@ -91,7 +92,7 @@ "patterns": [ { "name": "keyword.operator.rose", - "match": "[^\\s#\\w\\d\"\\(\\)\\[\\]\\{\\},\\.]+" + "match": "[^\\s#\\w\"\\(\\)\\[\\]\\{\\},\\.]+" } ] }