-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 82554ed
Showing
7 changed files
with
418 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore | ||
|
||
# Logs | ||
|
||
logs | ||
_.log | ||
npm-debug.log_ | ||
yarn-debug.log* | ||
yarn-error.log* | ||
lerna-debug.log* | ||
.pnpm-debug.log* | ||
|
||
# Caches | ||
|
||
.cache | ||
|
||
# Diagnostic reports (https://nodejs.org/api/report.html) | ||
|
||
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json | ||
|
||
# Runtime data | ||
|
||
pids | ||
_.pid | ||
_.seed | ||
*.pid.lock | ||
|
||
# Directory for instrumented libs generated by jscoverage/JSCover | ||
|
||
lib-cov | ||
|
||
# Coverage directory used by tools like istanbul | ||
|
||
coverage | ||
*.lcov | ||
|
||
# nyc test coverage | ||
|
||
.nyc_output | ||
|
||
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) | ||
|
||
.grunt | ||
|
||
# Bower dependency directory (https://bower.io/) | ||
|
||
bower_components | ||
|
||
# node-waf configuration | ||
|
||
.lock-wscript | ||
|
||
# Compiled binary addons (https://nodejs.org/api/addons.html) | ||
|
||
build/Release | ||
|
||
# Dependency directories | ||
|
||
node_modules/ | ||
jspm_packages/ | ||
|
||
# Snowpack dependency directory (https://snowpack.dev/) | ||
|
||
web_modules/ | ||
|
||
# TypeScript cache | ||
|
||
*.tsbuildinfo | ||
|
||
# Optional npm cache directory | ||
|
||
.npm | ||
|
||
# Optional eslint cache | ||
|
||
.eslintcache | ||
|
||
# Optional stylelint cache | ||
|
||
.stylelintcache | ||
|
||
# Microbundle cache | ||
|
||
.rpt2_cache/ | ||
.rts2_cache_cjs/ | ||
.rts2_cache_es/ | ||
.rts2_cache_umd/ | ||
|
||
# Optional REPL history | ||
|
||
.node_repl_history | ||
|
||
# Output of 'npm pack' | ||
|
||
*.tgz | ||
|
||
# Yarn Integrity file | ||
|
||
.yarn-integrity | ||
|
||
# dotenv environment variable files | ||
|
||
.env | ||
.env.development.local | ||
.env.test.local | ||
.env.production.local | ||
.env.local | ||
|
||
# parcel-bundler cache (https://parceljs.org/) | ||
|
||
.parcel-cache | ||
|
||
# Next.js build output | ||
|
||
.next | ||
out | ||
|
||
# Nuxt.js build / generate output | ||
|
||
.nuxt | ||
dist | ||
|
||
# Gatsby files | ||
|
||
# Comment in the public line in if your project uses Gatsby and not Next.js | ||
|
||
# https://nextjs.org/blog/next-9-1#public-directory-support | ||
|
||
# public | ||
|
||
# vuepress build output | ||
|
||
.vuepress/dist | ||
|
||
# vuepress v2.x temp and cache directory | ||
|
||
.temp | ||
|
||
# Docusaurus cache and generated files | ||
|
||
.docusaurus | ||
|
||
# Serverless directories | ||
|
||
.serverless/ | ||
|
||
# FuseBox cache | ||
|
||
.fusebox/ | ||
|
||
# DynamoDB Local files | ||
|
||
.dynamodb/ | ||
|
||
# TernJS port file | ||
|
||
.tern-port | ||
|
||
# Stores VSCode versions used for testing VSCode extensions | ||
|
||
.vscode-test | ||
|
||
# yarn v2 | ||
|
||
.yarn/cache | ||
.yarn/unplugged | ||
.yarn/build-state.yml | ||
.yarn/install-state.gz | ||
.pnp.* | ||
|
||
# IntelliJ based IDEs | ||
.idea | ||
|
||
# Finder (MacOS) folder config | ||
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# smdparser | ||
|
||
To install dependencies: | ||
|
||
```bash | ||
bun install | ||
``` | ||
|
||
To run: | ||
|
||
```bash | ||
bun run parser.ts | ||
``` | ||
|
||
This project was created using `bun init` in bun v1.1.29. [Bun](https://bun.sh) is a fast all-in-one JavaScript runtime. |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"name": "smdparser", | ||
"module": "parser.ts", | ||
"type": "module", | ||
"devDependencies": { | ||
"@types/bun": "latest" | ||
}, | ||
"peerDependencies": { | ||
"typescript": "^5.0.0" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
/** | ||
* GRAMMAR | ||
* formatStmt: text | | ||
* bold | | ||
* italic | | ||
* code | | ||
* multiline_code | | ||
* text: TEXT ?EOF | ||
* bold: STAR innerBoldStmt STAR | ||
* innerBoldStmt: italic | text | ||
* italic: UNDERSCORE innerItalicStmt UNDERSCORE | ||
* innerItalicStmt: bold | text | ||
* code: BACKTICK formatStmt BACKTICK | ||
* multiline_code: BACKTICK BACKTICK BACKTICK text BACKTICK BACKTICK BACKTICK | ||
*/ | ||
|
||
import { isAsciiAlpha, isAlphaNumeric } from "./utils.ts"; | ||
|
||
enum TokenType { | ||
STRING, | ||
BOLD, | ||
ITALIC, | ||
UNORDEREDLI, | ||
H1, | ||
H2, | ||
H3, | ||
H4, | ||
H5, | ||
MONOSPACE, | ||
CODE, | ||
UNTYPED, | ||
EOF, | ||
LINEBREAK, | ||
} | ||
|
||
type Token = { | ||
tokenValue: string; | ||
tokenType: TokenType; | ||
}; | ||
|
||
class Lexer { | ||
private position = 0; | ||
private input: string; | ||
private currentChar: string; | ||
public currentToken: Token = { | ||
tokenValue: "", | ||
tokenType: TokenType.UNTYPED, | ||
}; | ||
|
||
constructor(input: string, currentToken: Token = this.currentToken) { | ||
this.currentToken = currentToken; | ||
this.input = input; | ||
this.currentChar = this.input[this.position]; | ||
} | ||
|
||
private peek(stroke = 1): string { | ||
if (this.input.length - 1 >= this.position + stroke) { | ||
return this.input[this.position + stroke]; | ||
} | ||
return ""; | ||
} | ||
|
||
private peekSequence(end: number): string { | ||
let p = this.position; | ||
let s = ""; | ||
while (p <= end && this.input.length - 1 <= p) { | ||
s += this.input[this.position + p]; | ||
p++; | ||
} | ||
return s; | ||
} | ||
|
||
private invalidCharacterError() { | ||
throw `Invalid character: \`${this.currentChar}\` at position ${this.position}`; | ||
} | ||
|
||
private advance(stroke = 1) { | ||
if (this.position + stroke > this.input.length - 1) { | ||
this.currentChar = "\0"; | ||
} else { | ||
this.currentChar = this.input[this.position + stroke]; | ||
this.position += stroke; | ||
} | ||
} | ||
|
||
private getHeaderToken(): Token { | ||
const token: Token = { | ||
tokenValue: "", | ||
tokenType: TokenType.UNTYPED, | ||
}; | ||
|
||
for (let i = 5; i > 0; i--) { | ||
const prefix = "#".repeat(i); | ||
if (this.peekSequence(i) === prefix) { | ||
token.tokenValue = prefix; | ||
token.tokenType = TokenType[`H${i}` as keyof typeof TokenType]; | ||
this.advance(i); | ||
return token; | ||
} | ||
} | ||
|
||
token.tokenValue = "#"; | ||
token.tokenType = TokenType.H1; | ||
return token; | ||
} | ||
|
||
private getTextToken(): Token { | ||
let s = ""; | ||
while (isAlphaNumeric(this.currentChar) || this.currentChar === " ") { | ||
s += this.currentChar; | ||
this.advance(); | ||
} | ||
return { | ||
tokenValue: s, | ||
tokenType: TokenType.STRING, | ||
}; | ||
} | ||
|
||
private getCodeToken(): Token { | ||
const token: Token = { | ||
tokenValue: "", | ||
tokenType: TokenType.UNTYPED, | ||
}; | ||
if (this.peek() === "`" && this.peek(2) === "`") { | ||
token.tokenValue = "```"; | ||
token.tokenType = TokenType.CODE; | ||
} else { | ||
token.tokenValue = "`"; | ||
token.tokenType = TokenType.MONOSPACE; | ||
} | ||
return token; | ||
} | ||
|
||
public getNextToken(): Token { | ||
let token: Token = { | ||
tokenValue: "", | ||
tokenType: TokenType.UNTYPED, | ||
}; | ||
switch (this.currentChar) { | ||
case "\0": | ||
token.tokenType = TokenType.EOF; | ||
token.tokenValue = "\0"; | ||
break; | ||
case "_": | ||
token.tokenValue = "_"; | ||
token.tokenType = TokenType.ITALIC; | ||
this.advance(); | ||
break; | ||
case "*": | ||
if (this.peek() === "*") { | ||
token.tokenValue = "**"; | ||
token.tokenType = TokenType.BOLD; | ||
this.advance(2); | ||
} else { | ||
this.invalidCharacterError() | ||
} | ||
break; | ||
case "-": | ||
if (this.peek() === "-" && this.peek(2) === "-") { | ||
token.tokenValue = "---"; | ||
token.tokenType = TokenType.LINEBREAK; | ||
this.advance(3); | ||
} else { | ||
token.tokenValue = "-"; | ||
token.tokenType = TokenType.UNORDEREDLI; | ||
this.advance(); | ||
} | ||
break; | ||
case "#": | ||
token = this.getHeaderToken(); | ||
break; | ||
case "`": | ||
token = this.getCodeToken(); | ||
break; | ||
default: | ||
token = this.getTextToken(); | ||
} | ||
this.currentToken = token; | ||
return this.currentToken; | ||
} | ||
} | ||
|
||
const lexer = new Lexer("_**Hello** World_"); |
Oops, something went wrong.