From 72af7ea6bcdee61996ec723a3facbd3e1a6bfab4 Mon Sep 17 00:00:00 2001 From: Frusadev Date: Sat, 23 Nov 2024 15:51:26 +0000 Subject: [PATCH] Bug fixes and project restructuring --- package.json | 7 +- parser.ts | 578 --------------------------------------- src/h.html | 49 ++++ src/html.ts | 89 ++++++ src/lexer.ts | 204 ++++++++++++++ src/parser.ts | 330 ++++++++++++++++++++++ src/test.ts | 20 ++ utils.ts => src/utils.ts | 0 test.md | 3 + 9 files changed, 700 insertions(+), 580 deletions(-) delete mode 100644 parser.ts create mode 100644 src/h.html create mode 100644 src/html.ts create mode 100644 src/lexer.ts create mode 100644 src/parser.ts create mode 100644 src/test.ts rename utils.ts => src/utils.ts (100%) create mode 100644 test.md diff --git a/package.json b/package.json index 4c6307e..e5043c2 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,14 @@ { "name": "smdparser", - "module": "parser.ts", + "module": "src/test.ts", "type": "module", "devDependencies": { "@types/bun": "latest" }, "peerDependencies": { "typescript": "^5.0.0" + }, + "scripts": { + "test": "bun run src/test.ts" } -} \ No newline at end of file +} diff --git a/parser.ts b/parser.ts deleted file mode 100644 index b637132..0000000 --- a/parser.ts +++ /dev/null @@ -1,578 +0,0 @@ -/** - * GRAMMAR - * formatStmt: text | - * bold | - * italic | - * code | - * multiline_code | - * formatStmt | - * text: TEXT ?EOF - * bold: STAR innerBoldStmt STAR - * innerBoldStmt: italic | text - * italic: UNDERSCORE innerItalicStmt UNDERSCORE - * innerItalicStmt: bold | text - * code: BACKTICK formatStmt BACKTICK - * multiline_code: BACKTICK BACKTICK BACKTICK text BACKTICK BACKTICK BACKTICK - */ - -import { isAlphaNumeric } from "./utils.ts"; - -enum TokenType { - STRING, - NEWLINE, - BOLD, - ITALIC, - UNORDEREDLI, - HEADER1, - HEADER2, - HEADER3, - HEADER4, - HEADER5, - MONOSPACE, - CODE, - UNTYPED, - EOF, - LINEBREAK, -} - -export type Token = { - tokenValue: string; - tokenType: TokenType; -}; - -export type ASTNode = - | { - nodeType: NodeType; - token: Token; - children: Array; - } - | { - nodeType: NodeType.Code; - token: Token; - languageName: string; - children: Array; - }; - -export class Lexer { - private position = 0; - private input: string; - private currentChar: string; - public currentToken: Token = { - tokenValue: "", - tokenType: TokenType.UNTYPED, - }; - - constructor(input: string, currentToken: Token = this.currentToken) { - this.currentToken = currentToken; - this.input = input; - this.currentChar = this.input[this.position]; - } - - private peek(stroke = 1): string { - if (this.input.length - 1 >= this.position + stroke) { - return this.input[this.position + stroke]; - } - return ""; - } - - private peekSequence(size: number): string { - let p = this.position; - let s = ""; - const start = this.position; - // currentChar + ... + char ar input[size] - // while position < position at end: (position + size) - while (p < start + size) { - if (this.position === this.input.length - 1) { - break; - } - s += this.input[p]; - p++; - } - return s; - } - - private invalidCharacterError() { - throw `Invalid character: \`${this.currentChar}\` at position ${this.position}`; - } - - private advance(stroke = 1) { - if (this.position + stroke > this.input.length - 1) { - this.currentChar = "\0"; - } else { - this.currentChar = this.input[this.position + stroke]; - this.position += stroke; - } - } - - private getHeaderToken(): Token { - const token: Token = { - tokenValue: "", - tokenType: TokenType.UNTYPED, - }; - - if (this.peekSequence(5) === "#####") { - token.tokenType = TokenType.HEADER5; - token.tokenValue = "#####"; - this.advance(5); - return token; - } - - if (this.peekSequence(4) === "####") { - token.tokenType = TokenType.HEADER5; - token.tokenValue = "####"; - this.advance(4); - return token; - } - - if (this.peekSequence(3) === "###") { - token.tokenType = TokenType.HEADER4; - token.tokenValue = "###"; - this.advance(3); - return token; - } - - if (this.peekSequence(2) === "##") { - token.tokenType = TokenType.HEADER3; - token.tokenValue = "##"; - this.advance(2); - return token; - } - - token.tokenValue = "#"; - token.tokenType = TokenType.HEADER1; - this.advance(1); - return token; - } - - private getTextToken(): Token { - let s = ""; - while (isAlphaNumeric(this.currentChar) || this.currentChar === " ") { - s += this.currentChar; - this.advance(); - } - return { - tokenValue: s, - tokenType: TokenType.STRING, - }; - } - - private getCodeToken(): Token { - const token: Token = { - tokenValue: "", - tokenType: TokenType.UNTYPED, - }; - if (this.peek() === "`" && this.peek(2) === "`") { - token.tokenValue = "```"; - token.tokenType = TokenType.CODE; - this.advance(3); - } else { - token.tokenValue = "`"; - token.tokenType = TokenType.MONOSPACE; - this.advance(); - } - return token; - } - - public getNextToken(): Token { - let token: Token = { - tokenValue: "", - tokenType: TokenType.UNTYPED, - }; - switch (this.currentChar) { - case "\0": - token.tokenType = TokenType.EOF; - token.tokenValue = "\0"; - break; - case "_": - token.tokenValue = "_"; - token.tokenType = TokenType.ITALIC; - this.advance(); - break; - case "*": - if (this.peek() === "*") { - token.tokenValue = "**"; - token.tokenType = TokenType.BOLD; - this.advance(2); - } else { - this.invalidCharacterError(); - } - break; - case "-": - if (this.peek() === "-" && this.peek(2) === "-") { - token.tokenValue = "---"; - token.tokenType = TokenType.LINEBREAK; - this.advance(3); - } else { - token.tokenValue = "-"; - token.tokenType = TokenType.UNORDEREDLI; - this.advance(); - } - break; - case "#": - token = this.getHeaderToken(); - break; - case "`": - token = this.getCodeToken(); - break; - case "\n": - token.tokenValue = "\n"; - token.tokenType = TokenType.NEWLINE; - this.advance(); - break; - default: - token = this.getTextToken(); - } - this.currentToken = token; - return this.currentToken; - } -} - -enum NodeType { - RootNode, - UnorderedList, - UnorderedListRoot, - UnorderedListItem, - OrderedList, - StringNode, - Bold, - Italic, - Code, - Monospace, - Header1, - Header2, - Header3, - Header4, - Header5, - Void, - NewLineNode, -} - -export class Parser { - private currentToken: Token; - private lexer: Lexer; - - constructor(lexer: Lexer) { - this.lexer = lexer; - this.currentToken = this.lexer.getNextToken(); - } - - private invalidTokenError() { - throw `Invalid token: ${JSON.stringify(this.currentToken)}`; - } - - private eat(tokenType: TokenType) { - if (this.currentToken.tokenType === tokenType) { - this.currentToken = this.lexer.getNextToken(); - } else { - this.invalidTokenError(); - } - } - - private textStmt(): ASTNode { - const text: ASTNode = { - nodeType: NodeType.StringNode, - token: this.currentToken, - children: [], - }; - switch (this.currentToken.tokenType) { - case TokenType.STRING: - this.eat(TokenType.STRING); - break; - case TokenType.NEWLINE: - this.eat(TokenType.NEWLINE); - text.nodeType = NodeType.NewLineNode; - break; - default: - this.invalidTokenError(); - } - return text; - } - - private boldStmt(): ASTNode { - const node: ASTNode = { - nodeType: NodeType.Bold, - token: this.currentToken, - children: [], - }; - this.eat(TokenType.BOLD); - node.children = [...this.innerBoldStmt()]; - this.eat(TokenType.BOLD); - return node; - } - - private italicStmt(): ASTNode { - const node: ASTNode = { - nodeType: NodeType.Italic, - token: this.currentToken, - children: [], - }; - this.eat(TokenType.ITALIC); - node.children = [...this.innerItalicStmt()]; - this.eat(TokenType.ITALIC); - return node; - } - - private innerItalicStmt(): Array { - let nodes: Array = []; - while ( - [TokenType.BOLD, TokenType.STRING].includes(this.currentToken.tokenType) - ) { - switch (this.currentToken.tokenType) { - case TokenType.BOLD: - nodes = nodes.concat(this.boldStmt()); - break; - case TokenType.STRING: - nodes = nodes.concat(this.textStmt()); - } - } - return nodes; - } - - private innerBoldStmt(): Array { - let nodes: Array = []; - while ( - [TokenType.ITALIC, TokenType.STRING].includes(this.currentToken.tokenType) - ) { - switch (this.currentToken.tokenType) { - case TokenType.ITALIC: - nodes = nodes.concat(this.italicStmt()); - break; - case TokenType.STRING: - nodes = nodes.concat(this.textStmt()); - } - } - return nodes; - } - - private monoSpaceStmt(): ASTNode { - const node: ASTNode = { - nodeType: NodeType.Monospace, - token: this.currentToken, - children: [], - }; - this.eat(TokenType.MONOSPACE); - node.children = node.children.concat(this.textStmt()); - this.eat(TokenType.MONOSPACE); - return node; - } - - private codeStmt(): ASTNode { - const node: ASTNode = { - nodeType: NodeType.Code, - token: this.currentToken, - languageName: "", - children: [], - }; - this.eat(TokenType.CODE); - node.languageName = this.currentToken.tokenValue; - this.eat(TokenType.STRING); - this.eat(TokenType.NEWLINE); - node.children = node.children.concat(this.textStmt()); - this.eat(TokenType.CODE); - return node; - } - - private headerStmt(): ASTNode { - const node: ASTNode = { - nodeType: NodeType.Void, - token: this.currentToken, - children: [], - }; - const headers: Map = new Map([ - [TokenType.HEADER1, NodeType.Header1], - [TokenType.HEADER2, NodeType.Header2], - [TokenType.HEADER3, NodeType.Header3], - [TokenType.HEADER4, NodeType.Header4], - [TokenType.HEADER5, NodeType.Header5], - ]); - if (headers.get(node.token.tokenType) !== undefined) { - node.nodeType = headers.get(node.token.tokenType) as NodeType; - this.eat(this.currentToken.tokenType); - } else { - this.invalidTokenError(); - } - node.children = node.children.concat(this.textStmt()); - this.eat(TokenType.NEWLINE); - return node; - } - - private innerUnorederedLiStmt(): ASTNode { - const listNode: ASTNode = { - nodeType: NodeType.UnorderedListItem, - token: this.currentToken, - children: [], - }; - this.eat(TokenType.UNORDEREDLI); - switch (this.currentToken.tokenType) { - case TokenType.ITALIC: - listNode.children = listNode.children.concat(this.italicStmt()); - break; - case TokenType.BOLD: - listNode.children = listNode.children.concat(this.boldStmt()); - break; - case TokenType.STRING: - listNode.children = listNode.children.concat(this.textStmt()); - break; - case TokenType.MONOSPACE: - listNode.children = listNode.children.concat(this.monoSpaceStmt()); - break; - default: - listNode.children = listNode.children.concat(this.headerStmt()); - } - return listNode; - } - - private unorderedListStmt(): ASTNode { - const listRoot: ASTNode = { - nodeType: NodeType.UnorderedListRoot, - token: { - tokenType: TokenType.UNTYPED, - tokenValue: "lis", - }, - children: [], - }; - while (this.currentToken.tokenType === TokenType.UNORDEREDLI) { - listRoot.children = listRoot.children.concat( - this.innerUnorederedLiStmt(), - ); - } - return listRoot; - } - private formatStmt(): ASTNode { - let node: ASTNode = { - nodeType: NodeType.Void, - token: { - tokenType: TokenType.UNTYPED, - tokenValue: "", - }, - children: [], - }; - switch (this.currentToken.tokenType) { - case TokenType.STRING: - node = this.textStmt(); - break; - case TokenType.MONOSPACE: - node = this.monoSpaceStmt(); - break; - case TokenType.CODE: - node = this.codeStmt(); - break; - case TokenType.ITALIC: - node = this.italicStmt(); - break; - case TokenType.BOLD: - node = this.boldStmt(); - break; - case TokenType.NEWLINE: - node = this.textStmt(); - break; - case TokenType.UNORDEREDLI: - node = this.unorderedListStmt(); - break; - default: - if (TokenType[this.currentToken.tokenType].startsWith("HEADER")) { - node = this.headerStmt(); - } else { - this.invalidTokenError(); - } - } - return node; - } - public rootStmt(): ASTNode { - const rootNode: ASTNode = { - nodeType: NodeType.RootNode, - token: { - tokenType: TokenType.UNTYPED, - tokenValue: "Root", - }, - children: [], - }; - while (this.currentToken.tokenType !== TokenType.EOF) { - rootNode.children = rootNode.children.concat(this.formatStmt()); - } - return rootNode; - } -} - -export function toHtml(root: ASTNode): string { - let output = ""; - if (root.nodeType === NodeType.RootNode) { - output += "\n"; - } - for (const child of root.children) { - switch (child.nodeType) { - case NodeType.Bold: - output += ""; - output += toHtml(child); - output += "\n"; - break; - case NodeType.NewLineNode: - output += "
\n"; - break; - case NodeType.StringNode: - output += ""; - output += child.token.tokenValue; - output += "\n"; - break; - case NodeType.Italic: - output += ""; - output += toHtml(child); - output += "\n"; - break; - case NodeType.Header1: - output += "

\n"; - output += toHtml(child); - output += "\n

\n"; - break; - case NodeType.Header2: - output += "

\n"; - output += toHtml(child); - output += "\n

\n"; - break; - case NodeType.Header3: - output += "

\n"; - output += toHtml(child); - output += "\n

\n"; - break; - case NodeType.Header4: - output += "

\n"; - output += toHtml(child); - output += "\n

\n"; - break; - case NodeType.Header5: - output += "
\n"; - output += toHtml(child); - output += "\n
\n"; - break; - case NodeType.Monospace: - output += ""; - output += toHtml(child); - output += "\n"; - break; - case NodeType.UnorderedListRoot: - output += "
    \n"; - output += toHtml(child); - output += "\n
\n"; - break; - case NodeType.UnorderedListItem: - output += "
  • \n"; - output += toHtml(child); - output += "\n
  • \n"; - } - } - if (root.nodeType === NodeType.RootNode) { - output += "\n"; - } - return output; -} - -const fs = require("fs"); -try { - const data = fs.readFileSync("test.md", "utf8"); // Read file in UTF-8 encoding - const lexer = new Lexer(data); - const parser = new Parser(lexer); - console.log(toHtml(parser.rootStmt())); - //console.log(data); -} catch (err) { - console.error(err); -} diff --git a/src/h.html b/src/h.html new file mode 100644 index 0000000..7727c7a --- /dev/null +++ b/src/h.html @@ -0,0 +1,49 @@ + +

    + Section + +

    +
    +
      +
    • + List n 1 + +
    • + +
    +
    +
      +
    • + List n 2 + +
    • + +
    +
    +
      +
    • + List n 3 + +
    • + +
    +
    +
      +
    • + List n 4 + +
    • + +
    +
    +Test +test + + +
    + +hhhh + +
    + + diff --git a/src/html.ts b/src/html.ts new file mode 100644 index 0000000..e7766ad --- /dev/null +++ b/src/html.ts @@ -0,0 +1,89 @@ +import { NodeType } from "./parser.ts"; +import type { ASTNode } from "./parser.ts"; + +function fillSpace(size: number): string { + let n = 1; + let s = ""; + while (n <= size) { + s += " "; + n++; + } + return s; +} + +export function toHtml(root: ASTNode, incSize = 0): string { + let output = ""; + if (root.nodeType === NodeType.RootNode) { + output += "\n"; + } + const inc = fillSpace(incSize); + const nextInc = incSize + 3; + for (const child of root.children) { + switch (child.nodeType) { + case NodeType.Bold: + output += `${inc}`; + output += toHtml(child); + output += `${inc}\n`; + break; + case NodeType.NewLineNode: + output += `${inc}
    \n`; + break; + case NodeType.SpaceNode: + output += `${inc} `; + break; + case NodeType.StringNode: + output += `${inc}`; + output += child.token.tokenValue; + output += `${inc}\n`; + break; + case NodeType.Italic: + output += `${inc}`; + output += toHtml(child); + output += `${inc}\n`; + break; + case NodeType.Header1: + output += `${inc}

    \n`; + output += toHtml(child, nextInc); + output += `${inc}\n

    \n`; + break; + case NodeType.Header2: + output += `${inc}

    \n`; + output += toHtml(child, nextInc); + output += `${inc}\n

    \n`; + break; + case NodeType.Header3: + output += `${inc}

    \n`; + output += toHtml(child, nextInc); + output += `${inc}\n

    \n`; + break; + case NodeType.Header4: + output += `${inc}

    \n`; + output += toHtml(child, nextInc); + output += `${inc}\n

    \n`; + break; + case NodeType.Header5: + output += `${inc}
    \n`; + output += toHtml(child, nextInc); + output += `${inc}\n
    \n`; + break; + case NodeType.Monospace: + output += `${inc}`; + output += toHtml(child); + output += `${inc}\n`; + break; + case NodeType.UnorderedListRoot: + output += `${inc}
      \n`; + output += inc + toHtml(child, nextInc); + output += `${inc}\n
    \n`; + break; + case NodeType.UnorderedListItem: + output += "
  • \n"; + output += inc + toHtml(child); + output += `${inc}\n
  • \n`; + } + } + if (root.nodeType === NodeType.RootNode) { + output += "\n"; + } + return output; +} diff --git a/src/lexer.ts b/src/lexer.ts new file mode 100644 index 0000000..008a811 --- /dev/null +++ b/src/lexer.ts @@ -0,0 +1,204 @@ +import { isAlphaNumeric, isAsciiAlpha } from "./utils.ts"; + +export enum TokenType { + STRING, + SPACE, + NEWLINE, + BOLD, + ITALIC, + UNORDEREDLI, + HEADER1, + HEADER2, + HEADER3, + HEADER4, + HEADER5, + MONOSPACE, + CODE, + UNTYPED, + EOF, + LINEBREAK, +} + +export type Token = { + tokenValue: string; + tokenType: TokenType; +}; + +export class Lexer { + public position = 0; + private input: string; + private currentChar: string; + public currentToken: Token = { + tokenValue: "", + tokenType: TokenType.UNTYPED, + }; + + constructor(input: string, currentToken: Token = this.currentToken) { + this.currentToken = currentToken; + this.input = input; + this.currentChar = this.input[this.position]; + } + + private peek(stroke = 1): string { + if (this.input.length - 1 >= this.position + stroke) { + return this.input[this.position + stroke]; + } + return ""; + } + + private peekSequence(size: number): string { + let p = this.position; + let s = ""; + const start = this.position; + // currentChar + ... + char ar input[size] + // while position < position at end: (position + size) + while (p < start + size) { + if (this.position === this.input.length - 1) { + break; + } + s += this.input[p]; + p++; + } + return s; + } + + private invalidCharacterError() { + throw `Invalid character: \`${this.currentChar}\` at position ${this.position}`; + } + + private advance(stroke = 1) { + if (this.position + stroke > this.input.length - 1) { + this.currentChar = "\0"; + } else { + this.currentChar = this.input[this.position + stroke]; + this.position += stroke; + } + } + + private getHeaderToken(): Token { + const token: Token = { + tokenValue: "", + tokenType: TokenType.UNTYPED, + }; + + if (this.peekSequence(5) === "#####") { + token.tokenType = TokenType.HEADER5; + token.tokenValue = "#####"; + this.advance(5); + return token; + } + + if (this.peekSequence(4) === "####") { + token.tokenType = TokenType.HEADER4; + token.tokenValue = "####"; + this.advance(4); + return token; + } + + if (this.peekSequence(3) === "###") { + token.tokenType = TokenType.HEADER3; + token.tokenValue = "###"; + this.advance(3); + return token; + } + + if (this.peekSequence(2) === "##") { + token.tokenType = TokenType.HEADER2; + token.tokenValue = "##"; + this.advance(2); + return token; + } + + token.tokenValue = "#"; + token.tokenType = TokenType.HEADER1; + this.advance(1); + return token; + } + + private getTextToken(): Token { + let s = ""; + while (isAlphaNumeric(this.currentChar)) { + s += this.currentChar; + this.advance(); + } + return { + tokenValue: s, + tokenType: TokenType.STRING, + }; + } + + private getCodeToken(): Token { + const token: Token = { + tokenValue: "", + tokenType: TokenType.UNTYPED, + }; + if (this.peek() === "`" && this.peek(2) === "`") { + token.tokenValue = "```"; + token.tokenType = TokenType.CODE; + this.advance(3); + } else { + token.tokenValue = "`"; + token.tokenType = TokenType.MONOSPACE; + this.advance(); + } + return token; + } + + public getNextToken(): Token { + let token: Token = { + tokenValue: "", + tokenType: TokenType.UNTYPED, + }; + switch (this.currentChar) { + case "\0": + token.tokenType = TokenType.EOF; + token.tokenValue = "\0"; + break; + case " ": + token.tokenType = TokenType.SPACE; + token.tokenValue = " "; + this.advance(); + break; + case "_": + token.tokenValue = "_"; + token.tokenType = TokenType.ITALIC; + this.advance(); + break; + case "*": + if (this.peek() === "*") { + token.tokenValue = "**"; + token.tokenType = TokenType.BOLD; + this.advance(2); + } else { + this.invalidCharacterError(); + } + break; + case "-": + if (this.peek() === "-" && this.peek(2) === "-") { + token.tokenValue = "---"; + token.tokenType = TokenType.LINEBREAK; + this.advance(3); + } else { + token.tokenValue = "-"; + token.tokenType = TokenType.UNORDEREDLI; + this.advance(); + } + break; + case "#": + token = this.getHeaderToken(); + break; + case "`": + token = this.getCodeToken(); + break; + case "\n": + token.tokenValue = "\n"; + token.tokenType = TokenType.NEWLINE; + this.advance(); + break; + default: + token = this.getTextToken(); + } + this.currentToken = token; + return this.currentToken; + } +} diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 0000000..17ba385 --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,330 @@ +import { Lexer, TokenType, Token } from "./lexer.ts"; +/** + * GRAMMAR + * formatStmt: text | + * *textstmt | + * bold | + * italic | + * code | + * multiline_code | + * formatStmt | + * text: STRING ?EOF + * bold: STAR innerBoldStmt STAR + * innerBoldStmt: italic | text + * italic: UNDERSCORE innerItalicStmt UNDERSCORE + * innerItalicStmt: bold | text + * code: BACKTICK formatStmt BACKTICK + * multiline_code: BACKTICK BACKTICK BACKTICK text BACKTICK BACKTICK BACKTICK + */ + +export type ASTNode = + | { + nodeType: NodeType; + token: Token; + children: Array; + } + | { + nodeType: NodeType.Code; + token: Token; + languageName: string; + children: Array; + }; + +export enum NodeType { + RootNode, + UnorderedList, + UnorderedListRoot, + UnorderedListItem, + OrderedList, + StringNode, + SpaceNode, + Bold, + Italic, + Code, + Monospace, + Header1, + Header2, + Header3, + Header4, + Header5, + Void, + NewLineNode, +} + +export class Parser { + private currentToken: Token; + private lexer: Lexer; + + constructor(lexer: Lexer) { + this.lexer = lexer; + this.currentToken = this.lexer.getNextToken(); + } + + private invalidTokenError() { + throw `Invalid token: ${JSON.stringify(this.currentToken)}`; + } + + private eat(tokenType: TokenType) { + if (this.currentToken.tokenType === tokenType) { + this.currentToken = this.lexer.getNextToken(); + } else { + console.log(`Unexpected token: Eat ${this.lexer.position}`); + this.invalidTokenError(); + } + } + + private allCharsStmt(): ASTNode { + const node: ASTNode = { + nodeType: NodeType.StringNode, + token: { + tokenType: TokenType.STRING, + tokenValue: "", + }, + children: [], + }; + while ( + [TokenType.STRING, TokenType.SPACE].includes(this.currentToken.tokenType) + ) { + node.token.tokenValue += this.currentToken.tokenValue; + this.eat(this.currentToken.tokenType); + } + return node; + } + + private textStmt(): ASTNode { + const text: ASTNode = { + nodeType: NodeType.StringNode, + token: { + tokenType: TokenType.STRING, + tokenValue: "", + }, + children: [], + }; + + switch (this.currentToken.tokenType) { + case TokenType.STRING: + text.token = this.currentToken; + this.eat(TokenType.STRING); + this.textStmt(); + break; + case TokenType.NEWLINE: + text.token = this.currentToken; + this.eat(TokenType.NEWLINE); + text.nodeType = NodeType.NewLineNode; + break; + } + return text; + } + + private boldStmt(): ASTNode { + const node: ASTNode = { + nodeType: NodeType.Bold, + token: this.currentToken, + children: [], + }; + this.eat(TokenType.BOLD); + node.children = [...this.innerBoldStmt()]; + this.eat(TokenType.BOLD); + return node; + } + + private italicStmt(): ASTNode { + const node: ASTNode = { + nodeType: NodeType.Italic, + token: this.currentToken, + children: [], + }; + this.eat(TokenType.ITALIC); + node.children = [...this.innerItalicStmt()]; + this.eat(TokenType.ITALIC); + return node; + } + + private innerItalicStmt(): Array { + let nodes: Array = []; + while ( + [TokenType.BOLD, TokenType.STRING].includes(this.currentToken.tokenType) + ) { + switch (this.currentToken.tokenType) { + case TokenType.BOLD: + nodes = nodes.concat(this.boldStmt()); + break; + case TokenType.STRING: + nodes = nodes.concat(this.allCharsStmt()); + } + } + return nodes; + } + + private innerBoldStmt(): Array { + let nodes: Array = []; + while ( + [TokenType.ITALIC, TokenType.STRING].includes(this.currentToken.tokenType) + ) { + switch (this.currentToken.tokenType) { + case TokenType.ITALIC: + nodes = nodes.concat(this.italicStmt()); + break; + case TokenType.STRING: + nodes = nodes.concat(this.allCharsStmt()); + } + } + return nodes; + } + + private monoSpaceStmt(): ASTNode { + const node: ASTNode = { + nodeType: NodeType.Monospace, + token: this.currentToken, + children: [], + }; + this.eat(TokenType.MONOSPACE); + node.children = node.children.concat(this.allCharsStmt()); + this.eat(TokenType.MONOSPACE); + return node; + } + + private codeStmt(): ASTNode { + const node: ASTNode = { + nodeType: NodeType.Code, + token: this.currentToken, + languageName: "", + children: [], + }; + this.eat(TokenType.CODE); + node.languageName = this.currentToken.tokenValue; + this.eat(TokenType.STRING); + this.eat(TokenType.NEWLINE); + node.children = node.children.concat(this.allCharsStmt()); + this.eat(TokenType.CODE); + return node; + } + + private headerStmt(): ASTNode { + const node: ASTNode = { + nodeType: NodeType.Void, + token: this.currentToken, + children: [], + }; + const headers: Map = new Map([ + [TokenType.HEADER1, NodeType.Header1], + [TokenType.HEADER2, NodeType.Header2], + [TokenType.HEADER3, NodeType.Header3], + [TokenType.HEADER4, NodeType.Header4], + [TokenType.HEADER5, NodeType.Header5], + ]); + if (headers.get(node.token.tokenType) !== undefined) { + node.nodeType = headers.get(node.token.tokenType) as NodeType; + this.eat(this.currentToken.tokenType); + } else { + this.invalidTokenError(); + } + this.eat(TokenType.SPACE); + node.children = node.children.concat(this.allCharsStmt()); + return node; + } + + private innerUnorederedLiStmt(): ASTNode { + const listNode: ASTNode = { + nodeType: NodeType.UnorderedListItem, + token: this.currentToken, + children: [], + }; + this.eat(TokenType.UNORDEREDLI); + this.eat(TokenType.SPACE); + switch (this.currentToken.tokenType) { + case TokenType.ITALIC: + listNode.children = listNode.children.concat(this.italicStmt()); + break; + case TokenType.BOLD: + listNode.children = listNode.children.concat(this.boldStmt()); + break; + case TokenType.STRING: + listNode.children = listNode.children.concat(this.allCharsStmt()); + break; + case TokenType.MONOSPACE: + listNode.children = listNode.children.concat(this.monoSpaceStmt()); + break; + default: + listNode.children = listNode.children.concat(this.headerStmt()); + } + return listNode; + } + + private unorderedListStmt(): ASTNode { + const listRoot: ASTNode = { + nodeType: NodeType.UnorderedListRoot, + token: { + tokenType: TokenType.UNTYPED, + tokenValue: "lis", + }, + children: [], + }; + while (this.currentToken.tokenType === TokenType.UNORDEREDLI) { + listRoot.children = listRoot.children.concat( + this.innerUnorederedLiStmt(), + ); + } + return listRoot; + } + private formatStmt(): ASTNode { + let node: ASTNode = { + nodeType: NodeType.Void, + token: { + tokenType: TokenType.UNTYPED, + tokenValue: "", + }, + children: [], + }; + switch (this.currentToken.tokenType) { + case TokenType.STRING: + node = this.textStmt(); + break; + case TokenType.SPACE: + node = this.allCharsStmt(); + break; + case TokenType.MONOSPACE: + node = this.monoSpaceStmt(); + break; + case TokenType.CODE: + node = this.codeStmt(); + break; + case TokenType.ITALIC: + node = this.italicStmt(); + break; + case TokenType.BOLD: + node = this.boldStmt(); + break; + case TokenType.NEWLINE: + node = this.textStmt(); + break; + case TokenType.UNORDEREDLI: + node = this.unorderedListStmt(); + break; + default: + if (this.currentToken.tokenType === TokenType.EOF) { + return node; + } + if (TokenType[this.currentToken.tokenType].startsWith("HEADER")) { + node = this.headerStmt(); + } else { + this.invalidTokenError(); + } + } + return node; + } + public rootStmt(): ASTNode { + const rootNode: ASTNode = { + nodeType: NodeType.RootNode, + token: { + tokenType: TokenType.UNTYPED, + tokenValue: "Root", + }, + children: [], + }; + while (this.currentToken.tokenType !== TokenType.EOF) { + rootNode.children = rootNode.children.concat(this.formatStmt()); + } + return rootNode; + } +} diff --git a/src/test.ts b/src/test.ts new file mode 100644 index 0000000..52f896a --- /dev/null +++ b/src/test.ts @@ -0,0 +1,20 @@ +import { Lexer } from "./lexer.ts"; +import { Parser } from "./parser.ts"; +import { toHtml } from "./html.ts"; + +const data = `### Section +- List n 1 +- List n 2 +- List n 3 +- List n 4 +**Test _test_** + \`hhhh\` +`; +const lexer = new Lexer(data); +const parser = new Parser(lexer); +//while (lexer.currentToken.tokenType !== TokenType.EOF) { +// console.log(lexer.getNextToken()) +// console.log(TokenType[lexer.currentToken.tokenType]) +//} +//console.log(JSON.stringify(parser.rootStmt())); +console.log(toHtml(parser.rootStmt())); diff --git a/utils.ts b/src/utils.ts similarity index 100% rename from utils.ts rename to src/utils.ts diff --git a/test.md b/test.md new file mode 100644 index 0000000..239b7dc --- /dev/null +++ b/test.md @@ -0,0 +1,3 @@ +# Section 1 +## Hello +- ### Hello