diff --git a/.github/workflows/rust.yml b/.github/workflows/rust_linux.yml similarity index 100% rename from .github/workflows/rust.yml rename to .github/workflows/rust_linux.yml diff --git a/.github/workflows/rust_macos.yml b/.github/workflows/rust_macos.yml new file mode 100644 index 00000000..f82f7646 --- /dev/null +++ b/.github/workflows/rust_macos.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master", "dev" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: macos-latest + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cd rust&&cargo build + - name: Run tests + run: cd rust&&cargo test diff --git a/.github/workflows/rust_windows.yml b/.github/workflows/rust_windows.yml new file mode 100644 index 00000000..4dcf79d7 --- /dev/null +++ b/.github/workflows/rust_windows.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master", "dev" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cd rust&&cargo build + - name: Run tests + run: cd rust&&cargo test diff --git a/CMakeLists.txt b/CMakeLists.txt index d9a86293..fb2e0d4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,8 @@ file(GLOB_RECURSE CPP_SOURCES "src/*.cpp") message(${CPP_SOURCES}) -add_library(foo) +add_library(foo + src/compiler/compiler.cppm) target_sources(foo PUBLIC diff --git a/doc/developer/parser.yy b/doc/developer/ebnf.md similarity index 63% rename from doc/developer/parser.yy rename to doc/developer/ebnf.md index 33cee04d..8fa5e37b 100644 --- a/doc/developer/parser.yy +++ b/doc/developer/ebnf.md @@ -1,94 +1,3 @@ -%skeleton "lalr1.cc" // -*- C++ -*- -%require "3.8.2" -%header - -/* %define api.token.raw */ - -%define api.token.constructor -%define api.value.type variant -%define parse.assert - -%code requires { - # include - namespace trc::compiler { - class compiler; - } -} - -// The parsing context. -%param { trc::compiler::compiler& drv } - -%locations - -%define parse.trace -%define parse.error detailed -%define parse.lac full - -%code { -# include "compiler.hpp" -} - -/* %define api.value.type variant */ -%token FOR // for - WHILE // while - IF // if - FUNC // function - CLASS // class - ADD // + - SUB // - - MUL // * - DIV // / - ZDIV // // - MOD // % - POW // ** - AND // and - OR // or - NOT // not - EQUAL // == - UNEQUAL // != - LESS // < - GREATER // > - LESS_EQUAL // <= - GREATER_EQUAL // >= - IMPORT // import - GOTO // goto - DEL // del - ASSERT // assert - BREAK // break - CONTINUE // continue - SELFADD // += - SELFSUB // -= - SELFMUL // *= - SELFDIV // /= - SELFZDIV // //= - SELFMOD // %= - SELFPOW // **= - ASSIGN // = - STORE // := - NAME // 名称 - NULL_ // null - TRUE_ // true - FALSE_ // false - STRING_VALUE // 字符串值 - LONG_FLOAT_VALUE // 长浮点型值 - FLOAT_VALUE // 浮点数值 - LONG_INT_VALUE // 长整型值 - INT_VALUE // 整型值 - LEFT_BIG_BRACE // { - RIGHT_BIG_BRACE // } - LEFT_SMALL_BRACE // ( - RIGHT_SMALL_BRACE // ) - LEFT_MID_BRACE // [ - RIGHT_MID_BRACE // ] - POINT //. - COMMA // , - ELSE - IN - RETURN - PUBLIC - PRIVATE -%start program -%% program : statements statements : statements statement | statement statement : @@ -173,4 +82,4 @@ classdef : CLASS NAME LEFT_SMALL_BRACE NAME RIGHT_SMALL_BRACE LEFT_BIG_BRACE opt_funcdef_valdef RIGHT_BIG_BRACE assert : ASSERT expr del : DEL expr -%% + diff --git a/locales/zh_CN/LC_MESSAGES/trans.mo b/locales/zh_CN/LC_MESSAGES/trans.mo new file mode 100644 index 00000000..000dafd2 Binary files /dev/null and b/locales/zh_CN/LC_MESSAGES/trans.mo differ diff --git a/locales/zh_CN/LC_MESSAGES/trans.po b/locales/zh_CN/LC_MESSAGES/trans.po index 3d21edc8..bb59129d 100644 --- a/locales/zh_CN/LC_MESSAGES/trans.po +++ b/locales/zh_CN/LC_MESSAGES/trans.po @@ -1,4 +1,3 @@ - msgid "NamaError" msgstr "名字错误:" @@ -47,11 +46,11 @@ msgstr "运算符错误:" msgid "RedefinedError" msgstr "重定义错误:" -msgid "Error from " -msgstr "错误来自" +msgid "Error from {}" +msgstr "错误来自{}" -msgid "Error in line" -msgstr "错误发生在行" +msgid "Error in line {}" +msgstr "错误发生在行{}" msgid "Name \"%s\" is not defined." msgstr "名字\"%s\"没有被定义." @@ -68,7 +67,7 @@ msgstr "无法从操作系统中申请内存." msgid "\"%s\" division by zero." msgstr "\"%s\"被零除" -msgid "Could't find \"%\s\" module." +msgid "Could't find \"{}\" module." msgstr "无法找到\"%s\"模块." msgid "Key \"%s\" is not defined." @@ -77,11 +76,11 @@ msgstr "键\"%s\"未定义" msgid "%s is out of %s" msgstr "%超出了%的范围" -msgid "\"%s\" could not be \"%s\""" +msgid "\"%s\" could not be \"%s\"" msgstr "\"%s\"不能被转换为\"%s\"" -msgid "Couldn't use %s for types:%s and %s" -msgstr "不能使用运算符\"%s\"对于类型:%s and %s" +msgid "Couldn't use {} for types:{} and {}" +msgstr "不能使用运算符\"{}\"对于类型:{}和{}" msgid "Function %s is redefined" msgstr "函数%s被重定义" @@ -95,17 +94,17 @@ msgstr "%s需要%s个参数." msgid "Number %s is incorrect." msgstr "数字%s不正确." -msgid "The string isn't end with \" or \'" -msgstr "这个字符串不以\"或\'结尾" +msgid "The string isn't end with \" or '" +msgstr "这个字符串不以\"或'结尾" msgid "%s is not be expected." msgstr "%s是不被期待的" -msgid "%s is excepted." -msgstr "%s是被期待的."; +msgid "{} is excepted." +msgstr "{}是被期待的." msgid "Comments should end with */" -msgstr "多行注释应当以*/结尾"; +msgstr "多行注释应当以*/结尾" msgid "Escape character %s is not defined." msgstr "转义字符%s未定义." @@ -126,37 +125,34 @@ msgid "Dll %s was not found" msgstr "找不到dll\"%s\"" -msgid "The program executed code that should not have been executed.Please -report the problem to the Github repository." -msgstr "这个项目运行了不应该被运行的代码 -.请将这个问题报告给github仓库"; +msgid "The program executed code that should not have been executed.Please report the problem to the Github repository." +msgstr "这个项目运行了不应该被运行的代码.请将这个问题报告给github仓库" -msgid "Trc:\"%s\" is not a ctree file.Because its -magic number is error\n" -msgstr "Trc:\"%s\"不是一个ctree文件.因为它的魔数不正确.\n" +msgid "Trc:\"{}\" is not a ctree file.Because its magic number is error\n" +msgstr "Trc:\"{}\"不是一个ctree文件.因为它的魔数不正确.\n" -msgid "Trc is a stack programming language. This -project implements most of the modern -programming language basics, provides a perfect -tool chain, which is suitable for working -scripts or embedded in your projects, and helps +msgid "Trc is a stack programming language. This \ +project implements most of the modern \ +programming language basics, provides a perfect \ +tool chain, which is suitable for working \ +scripts or embedded in your projects, and helps \ to learn how to compile. " -msgstr "Trc是一门基于栈的编程语言。这个项目实现 -了大部分现代编程语言的功能,提供了一个完 -善的工具链。它很方便被嵌入到你的项目中或 +msgstr "Trc是一门基于栈的编程语言。这个项目实现\ +了大部分现代编程语言的功能,提供了一个完\ +善的工具链。它很方便被嵌入到你的项目中或\ 者作为工作脚本,也可以帮助你去学习编译原理." msgid "mode is not defined.\n" -msgstr "模式没有被定义\n"; +msgstr "模式没有被定义\n" msgid "var" msgstr "变量" msgid " is not defined.\n" -msgstr "没有被定义" +msgstr "没有被定义.\n" -msgid "Trc debugger is running.You can read 'doc/use/TDB.md' to find the help." -msgstr "trc的调试器正在运行.你可以阅读'doc/ +msgid "Trc debugger is running.You can read 'doc/use/TDB.md' to find the help.\n" +msgstr "trc的调试器正在运行.你可以阅读'doc/\ use/TDB.md'以寻找帮助.\n" msgid "instruction" diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 53c16b03..1a0a5031 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628a8f9bd1e24b4e0db2b4bc2d000b001e7dd032d54afa60a68836aeec5aa54a" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -59,6 +59,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "block" version = "0.1.6" @@ -144,9 +150,9 @@ checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", @@ -187,9 +193,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.151" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "locale_config" @@ -219,6 +225,36 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + [[package]] name = "objc" version = "0.2.7" @@ -256,9 +292,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.76" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -304,9 +340,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", @@ -316,9 +352,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -362,6 +398,8 @@ dependencies = [ "colored", "downcast-rs", "gettext-rs", + "lazy_static", + "num-bigint", "rand", ] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index ba9a3781..827f3745 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -16,3 +16,8 @@ clap = { version = "4.4.18", features = ["derive"] } gettext-rs = "0.7.0" colored = "2.1.0" downcast-rs = "1.2.0" +lazy_static = "1.4.0" +num-bigint = "0.4.4" + +[profile.release] +panic = "abort" diff --git a/rust/README.md b/rust/README.md index ca06ea18..4613bcb5 100644 --- a/rust/README.md +++ b/rust/README.md @@ -11,6 +11,7 @@ So this is just an experimental project and I will keep develop c++ version and Like other common rust project.Just use ```cargo build``` Running tests is like other rust project,too.Just ```cargo test``` +But in order to read test data file,please run in the root dir. ## How to use diff --git a/rust/docs/developer/EBNF.md b/rust/docs/developer/EBNF.md new file mode 100644 index 00000000..b0ce1a15 --- /dev/null +++ b/rust/docs/developer/EBNF.md @@ -0,0 +1,15 @@ +# the ebnf of the trc + +help to develop the compiler + +programs : statements +statements : statements statement +statement : + ID := expr + ID(argvs) +opt_argvs: argvs | empty +argvs : argvs , argv | argv +argv : expr +expr : expr + term | expr - term +term : term * factor | term / factor +factor : (expr) | ID diff --git a/rust/docs/usage.md b/rust/docs/usage.md index cdaad5fe..21912d35 100644 --- a/rust/docs/usage.md +++ b/rust/docs/usage.md @@ -45,6 +45,7 @@ There are also many easape char in the string: |\\\\|\| |\'|'| |\"|"| +|\0|the tick of the end of the string| If you add ```r``` or ```R``` in front of the string.Trc will treat it as a raw string. Yes.These rules are from Python.I love its grammar rules @@ -82,6 +83,19 @@ Here are the operator support |>>|bit right shift| |!|not| +Obviously,operators like ```+=``` is supported,too. + +But,something should be noticed is that you cannot use logical operators for ```int``` or anything else,just for bool. + +So,code like this cannot be compiled successfully: + +```rust +a := 1 +if a { + println(a) +} +``` + Ok,just like others language,but there is an important difference. you cannot use the different types of values to calaulate @@ -124,3 +138,24 @@ the second is use ```/**/```,this kind can cross the line,like: hello world! */ ``` + +## the var of trc + +First,we support the UTF-8 with your var name.So you can define your var like this: +```go +你好:=90 +``` + +## Data structures for Trc + +Std lib provide many kinds of data structures for Trc.Here is the list: + +|Structure| +|:---| +|St table| +|suffix automaton| +|ac automaton| +|list| +|forward list| +|stack| +|deque| diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.mo b/rust/locales/zh_CN/LC_MESSAGES/trans.mo index d0511b6a..4a4fbf1f 100644 Binary files a/rust/locales/zh_CN/LC_MESSAGES/trans.mo and b/rust/locales/zh_CN/LC_MESSAGES/trans.mo differ diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.po b/rust/locales/zh_CN/LC_MESSAGES/trans.po index 0cf3d469..cf011ce6 100644 --- a/rust/locales/zh_CN/LC_MESSAGES/trans.po +++ b/rust/locales/zh_CN/LC_MESSAGES/trans.po @@ -22,3 +22,34 @@ msgstr "操作符{}不支持类型{}" msgid "The number of data of vm stack is not correct, should have {} data" msgstr "虚拟机栈中数据数量不正确,期望有{}个数据" + +msgid "{} is unmatched" +msgstr "{}未匹配" + +msgid "ZeroDivisionError" +msgstr "除零错误" + +msgid "{} is divided by zero" +msgstr "{}被零除" + +msgid "frame stack is empty.But running a pop frame opcode" +msgstr "帧栈为空,但运行了pop frame指令" + +msgid "Prefix {} can be used for float" +msgstr "前缀{}不能对浮点数使用" + +msgid "NumberOverFlowError" +msgstr "数值溢出错误" + +msgid "Float {} is too large to store" +msgstr "浮点数{}超过了储存范围" + +msgid "token {} is not expected" +msgstr "token{}不是被期望的" + +msgid "In module {}" +msgstr "在模块{}中" + +msgid "Error in line {}" +msgstr "错误在第{}行" + diff --git a/rust/script/pre-commit b/rust/script/pre-commit new file mode 100755 index 00000000..97d4f1b6 --- /dev/null +++ b/rust/script/pre-commit @@ -0,0 +1,8 @@ +#!/bin/sh + +cd script +python3 gen_locales.py +cd ../rust +cargo fmt +cd script +python3 gen_locales.py diff --git a/rust/src/base.rs b/rust/src/base.rs index a91e7351..2e946440 100644 --- a/rust/src/base.rs +++ b/rust/src/base.rs @@ -1 +1,4 @@ +pub mod codegen; +pub mod ctrc; pub mod error; +pub mod func; diff --git a/rust/src/base/codegen.rs b/rust/src/base/codegen.rs new file mode 100644 index 00000000..ca000937 --- /dev/null +++ b/rust/src/base/codegen.rs @@ -0,0 +1,71 @@ +use super::func; + +pub enum Opcode { + Add, + Sub, + Mul, + Div, + ExtraDiv, + Mod, + Power, + Eq, + Ne, + Lt, + Le, + Gt, + Ge, + And, + Or, + Not, + Xor, + BitNot, + BitAnd, + BitOr, + BitLeftShift, + BitRightShift, + // change the option code index + Goto, + // return from a function + PopFrame, + // create a frame to hold the function + NewFrame, + // Load a int from const pool + LoadInt, +} + +pub struct ConstPool { + pub intpool: Vec, + pub stringpool: Vec, + pub floatpool: Vec, +} + +impl ConstPool { + pub fn new() -> Self { + Self { + intpool: Vec::new(), + stringpool: Vec::new(), + floatpool: Vec::new(), + } + } +} + +pub struct Inst { + pub opcode: Opcode, + pub operand: usize, +} + +pub struct StaticData { + pub constpool: ConstPool, + pub inst: Vec, + pub funcs: Vec, +} + +impl StaticData { + pub fn new() -> StaticData { + Self { + constpool: ConstPool::new(), + inst: vec![], + funcs: vec![], + } + } +} diff --git a/rust/src/base/ctrc.rs b/rust/src/base/ctrc.rs new file mode 100644 index 00000000..7be5b67f --- /dev/null +++ b/rust/src/base/ctrc.rs @@ -0,0 +1,3 @@ +//! generate ctrc file +//! ctrc file is trc's compiled object +//! can be loaded and runned by vm without compiling diff --git a/rust/src/base/error.rs b/rust/src/base/error.rs index 913305af..4a6fc8a8 100644 --- a/rust/src/base/error.rs +++ b/rust/src/base/error.rs @@ -1,42 +1,77 @@ use gettextrs::gettext; -use std::process::exit; - -const EXIT_FAILURE: i32 = 1; +use std::error::Error; +use std::fmt::{Debug, Display}; pub const SYNTAX_ERROR: &str = "SyntaxError"; pub const OPERATOR_ERROR: &str = "OperatorError"; -pub const VM_ERROR:&str = "VmError"; +pub const VM_ERROR: &str = "VmError"; +pub const ZERO_DIVSION_ERROR: &str = "ZeroDivisionError"; +pub const NUMBER_OVER_FLOW: &str = "NumberOverFlowError"; pub const STRING_WITHOUT_END: &str = "this string should be ended with {}"; +pub const UNMATCHED_BRACE: &str = "{} is unmatched"; pub const OPERATOR_IS_NOT_SUPPORT: &str = "operator {} is not supported for type {}"; -pub const VM_DATA_NUMBER:&str = "The number of data of vm stack is not correct, should have {} data"; +pub const VM_DATA_NUMBER: &str = + "The number of data of vm stack is not correct, should have {} data"; +pub const VM_FRAME_EMPTY: &str = "frame stack is empty.But running a pop frame opcode"; +pub const ZERO_DIV: &str = "{} is divided by zero"; +pub const PREFIX_FOR_FLOAT: &str = "Prefix {} can be used for float"; +pub const FLOAT_OVER_FLOW: &str = "Float {} is too large to store"; +pub const UNEXPECTED_TOKEN: &str = "token {} is not expected"; +pub const ERROR_IN_LINE: &str = "Error in line {}"; +pub const IN_MODULE: &str = "In module {}"; +#[derive(Debug)] pub struct ErrorInfo { pub message: String, - errot_type: &'static str, + error_type: String, } impl ErrorInfo { - pub fn new(message: String, error_type: &'static str) -> ErrorInfo { + pub fn new(message: String, error_type: String) -> ErrorInfo { ErrorInfo { message, - errot_type: error_type, + error_type, } } } -pub trait ErrorContent { +pub trait ErrorContent: Debug + Send + Sync { fn get_module_name(&self) -> &str; fn get_line(&self) -> usize; } -/// report error in vm or compiler -/// we will translate the error type to gettextrs -/// but you should translate the error messgae by caller -pub fn report_error(content: &impl ErrorContent, info: ErrorInfo) { - eprintln!("Error in line {}", content.get_line()); - eprintln!("In module {}", content.get_module_name()); - eprintln!("{}:{}", gettext(info.errot_type), info.message); - exit(EXIT_FAILURE); +#[derive(Debug)] +pub struct RuntimeError { + content: Box, + info: ErrorInfo, +} + +impl Error for RuntimeError {} + +impl Display for RuntimeError { + /// report error in vm or compiler + /// we will translate the error type to gettextrs + /// but you should translate the error messgae by caller + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = format!( + r#"{} +{} +{}:{}"#, + gettext!(ERROR_IN_LINE, self.content.get_line()), + gettext!(IN_MODULE, self.content.get_module_name()), + gettext(self.info.error_type.clone()), + self.info.message + ); + write!(f, "{}", s) + } } + +impl RuntimeError { + pub fn new(content: Box, info: ErrorInfo) -> RuntimeError { + RuntimeError { content, info } + } +} + +pub type RunResult = Result; diff --git a/rust/src/base/func.rs b/rust/src/base/func.rs new file mode 100644 index 00000000..4da3b020 --- /dev/null +++ b/rust/src/base/func.rs @@ -0,0 +1,9 @@ +pub struct Func { + pub name: String, +} + +impl Func { + fn new(name: String) -> Self { + Self { name } + } +} diff --git a/rust/src/cfg.rs b/rust/src/cfg.rs index 25a01f3b..bd7a7109 100644 --- a/rust/src/cfg.rs +++ b/rust/src/cfg.rs @@ -1,3 +1,4 @@ //! some constant values and configurations in trc pub const MAIN_MODULE_NAME: &str = "main"; +pub const FLOAT_OVER_FLOW_LIMIT: usize = 18; diff --git a/rust/src/compiler.rs b/rust/src/compiler.rs index ba288750..bcabd5c5 100644 --- a/rust/src/compiler.rs +++ b/rust/src/compiler.rs @@ -1,13 +1,14 @@ //! reference iterator:https://stackoverflow.com/questions/43952104/how-can-i-store-a-chars-iterator-in-the-same-struct-as-the-string-it-is-iteratin //! reference float hash map:https://www.soinside.com/question/tUJxYmevbVSHZYe2C2AK5o +mod ast; mod token; use self::token::TokenLex; -use crate::base::error; +use crate::base::codegen::{ConstPool, StaticData}; +use crate::base::error::{self, RunResult}; use crate::cfg; -use crate::tvm::ConstPool; -use std::collections::hash_map; +use std::collections::{hash_map, HashMap}; use std::io::BufRead; use std::{fs, io, vec}; @@ -22,6 +23,7 @@ pub struct Option { inputsource: InputSource, } +#[derive(Debug, Clone)] pub struct Content { module_name: String, line: usize, @@ -45,6 +47,13 @@ impl Content { } } + pub fn new_line(module_name: &str, line: usize) -> Self { + Self { + module_name: String::from(module_name), + line, + } + } + pub fn add_line(&mut self) { self.line += 1; } @@ -52,6 +61,10 @@ impl Content { pub fn del_line(&mut self) { self.line -= 1; } + + pub fn set_line(&mut self, line: usize) { + self.line = line; + } } impl Option { @@ -63,64 +76,94 @@ impl Option { } } -#[derive(Hash, Eq, PartialEq)] +#[derive(Hash, Eq, PartialEq, Clone)] pub struct Float { - front:i32, - back:i32 + front: u32, + back: u32, } impl Float { - fn new(front:i32, back:i32) -> Self { - Self { - front, - back + fn new(front: u32, back: u32) -> Self { + Self { front, back } + } + + fn get_len(mut tmp: u32) -> u8 { + if tmp == 0 { + return 1; + } + let ret: u8 = 0; + while tmp != 0 { + tmp /= 10; + } + ret + } + + pub fn to_float(&self) -> f64 { + let len = Self::get_len(self.back); + let mut float_part = self.back as f64; + for _ in 0..len { + float_part /= 10.0; } + self.front as f64 + float_part } } +type Pool = hash_map::HashMap; + pub struct ValuePool { - const_ints: hash_map::HashMap, - const_strings: hash_map::HashMap, - const_floats: hash_map::HashMap + const_ints: Pool, + const_strings: Pool, + const_floats: Pool, + name_pool: Pool, + const_big_int: Pool, } -const INT_VAL_POOL_ZERO:usize = 0; -const INT_VAL_POOL_ONE:usize = 1; +const INT_VAL_POOL_ZERO: usize = 0; +const INT_VAL_POOL_ONE: usize = 1; impl ValuePool { fn new() -> Self { let mut ret = Self { - const_ints: hash_map::HashMap::new(), - const_floats: hash_map::HashMap::new(), - const_strings: hash_map::HashMap::new() + const_ints: HashMap::new(), + const_floats: HashMap::new(), + const_strings: HashMap::new(), + name_pool: HashMap::new(), + const_big_int: HashMap::new(), }; ret.add_int(0); ret.add_int(1); ret } - fn add_int(&mut self, val:i64) -> usize { + fn add_int(&mut self, val: i64) -> usize { let len_tmp = self.const_ints.len(); *self.const_ints.entry(val).or_insert(len_tmp) } - fn add_string(&mut self, val:String) -> usize { - let len_tmp = self.const_strings.len(); - *self.const_strings.entry(val).or_insert(len_tmp) + fn string_get(pool: &mut Pool, str: String) -> usize { + let len_tmp = pool.len(); + *pool.entry(str).or_insert(len_tmp) + } + + fn add_string(&mut self, val: String) -> usize { + Self::string_get(&mut self.const_strings, val) } - fn add_float(&mut self, val:Float) -> usize { + fn add_float(&mut self, val: Float) -> usize { let len_tmp = self.const_floats.len(); *self.const_floats.entry(val).or_insert(len_tmp) } + fn add_id(&mut self, val: String) -> usize { + Self::string_get(&mut self.name_pool, val) + } + fn store_val_to_vm(&mut self) -> ConstPool { let mut ret = ConstPool::new(); ret.intpool.resize(self.const_ints.len(), 0); for i in &self.const_ints { ret.intpool[*i.1] = *i.0; } - ret } } @@ -142,9 +185,27 @@ impl StringSource { } } +impl Iterator for StringSource { + type Item = char; + + fn next(&mut self) -> std::option::Option { + match self.read() { + '\0' => None, + other => Some(other), + } + } +} + impl TokenIo for StringSource { fn unread(&mut self, c: char) { + if c == '\0' { + return; + } self.pos -= self.prev_size; + // check if match the right char + if cfg!(debug_assertions) { + assert_eq!(self.text[self.pos..].chars().next().unwrap(), c); + } } fn read(&mut self) -> char { @@ -161,7 +222,7 @@ impl TokenIo for StringSource { } } -trait TokenIo { +trait TokenIo: Iterator { fn unread(&mut self, c: char); fn read(&mut self) -> char; @@ -178,11 +239,29 @@ impl FileSource { pub fn new(f: fs::File) -> Self { let buf = io::BufReader::new(f); let s = String::new(); - FileSource { + let mut ret = FileSource { back: vec![], buf, input_pos: 0, s, + }; + ret.init_new_line(); + ret + } + + fn init_new_line(&mut self) { + self.s.clear(); + self.buf.read_line(&mut self.s).unwrap(); + self.input_pos = 0; + } +} + +impl Iterator for FileSource { + type Item = char; + fn next(&mut self) -> std::option::Option { + match self.read() { + '\0' => None, + other => Some(other), } } } @@ -200,9 +279,10 @@ impl TokenIo for FileSource { let mut input_pos = self.s[self.input_pos..].chars(); match input_pos.next() { None => { - self.s.clear(); - self.buf.read_line(&mut self.s).unwrap(); - self.input_pos = 0; + self.init_new_line(); + if self.s.is_empty() { + return '\0'; + } } Some(c) => { self.input_pos += c.len_utf8(); @@ -215,14 +295,14 @@ impl TokenIo for FileSource { pub struct Compiler { // to support read from stdin and file - input: Box, + input: Box>, const_pool: ValuePool, option: Option, content: Content, } impl Compiler { - fn new(option: Option) -> Self { + pub fn new(option: Option) -> Self { match option.inputsource { InputSource::File(ref filename) => { let f = std::fs::File::open(filename); @@ -239,7 +319,7 @@ impl Compiler { } } - fn new_string_compiler(option: Option, source: &str) -> Self { + pub fn new_string_compiler(option: Option, source: &str) -> Self { Compiler { input: Box::new(StringSource::new(String::from(source))), const_pool: ValuePool::new(), @@ -248,7 +328,62 @@ impl Compiler { } } - fn lex(&mut self) { + pub fn lex(&mut self) -> RunResult<()> { let token_lexer = TokenLex::new(self); + let mut ast_builder = ast::AstBuilder::new(token_lexer); + ast_builder.generate_code()?; + Ok(()) + } +} + +mod tests { + use super::*; + use std::fs::{read_to_string, File}; + + fn check_read(reader: &mut impl TokenIo, s: &str) { + let mut iter = s.chars(); + for i in reader { + assert_eq!(i, iter.next().unwrap()); + } + assert_eq!(iter.next(), None); + } + + #[test] + fn test_string_literal() { + let source = "source\np"; + let mut t = StringSource::new(String::from(source)); + let mut tmp: Vec = vec![t.read(), t.read()]; + tmp.reverse(); + for i in &tmp { + t.unread(*i); + } + check_read(&mut t, source) + } + + #[test] + fn test_file_read() { + let test_file_path = "tests/testdata/compiler/compiler1.txt"; + let source = read_to_string(test_file_path).expect("please run in root dir"); + let mut t = FileSource::new(File::open(test_file_path).expect("please run in root dir")); + let mut tmp: Vec = vec![t.read(), t.read()]; + tmp.reverse(); + for i in &tmp { + t.unread(*i); + } + check_read(&mut t, &source) + } + + #[test] + fn test_value_pool() { + let mut pool = ValuePool::new(); + assert_eq!(pool.add_int(7), 2); + assert_eq!(pool.add_int(1), INT_VAL_POOL_ONE); + assert_eq!(pool.add_int(0), INT_VAL_POOL_ZERO); + assert_eq!(pool.add_float(Float::new(9, 0)), 0); + assert_eq!(pool.add_float(Float::new(9, 0)), 0); + assert_eq!(pool.add_float(Float::new(9, 5)), 1); + assert_eq!(pool.add_string(String::from("value")), 0); + assert_eq!(pool.add_string(String::from("value")), 0); + assert_eq!(pool.add_string(String::from("vale")), 1); } } diff --git a/rust/src/compiler/ast.rs b/rust/src/compiler/ast.rs new file mode 100644 index 00000000..ad338891 --- /dev/null +++ b/rust/src/compiler/ast.rs @@ -0,0 +1,70 @@ +use super::{token::Token, Compiler, TokenLex}; +use crate::base::{codegen::StaticData, error::*}; +use clap::error; +use gettextrs::gettext; + +pub struct AstBuilder<'a> { + token_lexer: TokenLex<'a>, + staticdata: StaticData, +} + +impl<'a> AstBuilder<'a> { + pub fn new(token_lexer: TokenLex<'a>) -> Self { + AstBuilder { + token_lexer, + staticdata: StaticData::new(), + } + } + + fn while_lex(&mut self) -> RunResult<()> { + Ok(()) + } + + fn for_lex(&mut self) -> RunResult<()> { + Ok(()) + } + + fn generate_block(&mut self, t: Token) -> RunResult<()> { + Ok(()) + } + + fn statement(&mut self, mut t: Token) -> RunResult<()> { + match t.tp { + super::token::TokenType::ID => {} + _ => { + return Err(RuntimeError::new( + Box::new(self.token_lexer.compiler_data.content.clone()), + ErrorInfo::new( + gettextrs::gettext(SYNTAX_ERROR), + gettext!(UNEXPECTED_TOKEN, t.tp.to_string()), + ), + )) + } + } + Ok(()) + } + + pub fn generate_code(&mut self) -> RunResult<()> { + loop { + let token = self.token_lexer.next_token()?; + match token { + Some(token) => { + self.statement(token)?; + } + None => { + return Ok(()); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_assign() {} + + #[test] + fn builtin_function_call() {} +} diff --git a/rust/src/compiler/token.rs b/rust/src/compiler/token.rs index 32c9a777..b74c11dd 100644 --- a/rust/src/compiler/token.rs +++ b/rust/src/compiler/token.rs @@ -1,240 +1,634 @@ +use super::{Compiler, Content, Float, INT_VAL_POOL_ZERO}; +use crate::{ + base::error::{ + self, ErrorContent, ErrorInfo, RunResult, RuntimeError, FLOAT_OVER_FLOW, NUMBER_OVER_FLOW, + PREFIX_FOR_FLOAT, SYNTAX_ERROR, + }, + cfg::FLOAT_OVER_FLOW_LIMIT, +}; use gettextrs::gettext; +use lazy_static::lazy_static; +use std::{collections::HashMap, fmt::Display, process::exit}; -use crate::base::error; -use super::{Compiler, INT_VAL_POOL_ZERO}; - -#[derive(PartialEq, Debug)] -enum TokenType { +#[derive(PartialEq, Debug, Clone)] +pub enum TokenType { // . - DOT, + Dot, // , - COMMA, + Comma, // { - LEFT_BIG_BRACE, + LeftBigBrace, // } - RIGHT_BIG_BRACE, + RightBigBrace, // [ - LEFT_MIDDLE_BRACE, + LeftMiddleBrace, // ] - RIGHT_MIDDLE_BRACE, + RightMiddleBrace, // ( - LEFT_SMALL_BRACE, + LeftSmallBrace, // ) - RIGHT_SMALL_BRACE, + RightSmallBrace, // + - ADD, + Add, // - - SUB, + Sub, // * - MUL, + Mul, // / - DIV, + Div, // % - MOD, + Mod, // // - EXACT_DIVISION, + ExactDiv, + // ~ + BitNot, + // << + BitLeftShift, + // >> + BitRightShift, + // & + BitAnd, + // | + BitOr, + // ^ + Xor, + // ** + Power, // += - SELF_ADD, + SelfAdd, // -= - SELF_SUB, + SelfSub, // *= - SELF_MUL, + SelfMul, // /= - SELF_DIV, + SelfDiv, // //= - SELF_EXTRA_DIV, + SelfExactDiv, // %= - SELF_MOD, - // ** - POWER, + SelfMod, // **= - SELF_POWER, - INT_VALUE, - STRING_VALUE, - FLOAT_VALUE, - LONG_INT_VALUE, - LONG_FLOAT_VALUE, + SelfPower, + // ~= + SelfBitNot, + // <<= + SelfBitLeftShift, + // >>= + SelfBitRightShift, + // &= + SelfBitAnd, + // |= + SelfBitOr, + // ^= + SelfXor, + IntValue, + StringValue, + FloatValue, + LongIntValue, // = - ASSIGN, + Assign, // := - STORE, + Store, // == - EQUAL, + Equal, // != - UNEQUAL, + NotEqual, // > - GREATER, + Greater, // < - LESS, + Less, // <= - LESS_EQUAL, + LessEqual, // >= - GREATER_EQUAL, + GreaterEqual, // ! - NOT, + Not, + // || + Or, + // && + And, + // : + Colon, + // ; + Semicolon, + ID, + While, + For, + If, + Else, + Class, + Match, + Func, + EndOfLine, } -#[derive(PartialEq, Debug)] -pub enum Data { - Ind(usize), - NONEDATA, +impl Display for TokenType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let res: String; + match self { + TokenType::Dot => res = ".".to_string(), + TokenType::Comma => res = ",".to_string(), + TokenType::LeftBigBrace => res = "{".to_string(), + TokenType::RightBigBrace => res = "}".to_string(), + TokenType::LeftMiddleBrace => res = "[".to_string(), + TokenType::RightMiddleBrace => res = "]".to_string(), + TokenType::LeftSmallBrace => res = "(".to_string(), + TokenType::RightSmallBrace => res = ")".to_string(), + TokenType::Add => res = "+".to_string(), + TokenType::Sub => res = "-".to_string(), + TokenType::Mul => res = "*".to_string(), + TokenType::Div => res = "/".to_string(), + TokenType::Mod => res = "%".to_string(), + TokenType::ExactDiv => res = "//".to_string(), + TokenType::BitNot => res = "~".to_string(), + TokenType::BitLeftShift => res = "<<".to_string(), + TokenType::BitRightShift => res = ">>".to_string(), + TokenType::BitAnd => res = "&".to_string(), + TokenType::BitOr => res = "|".to_string(), + TokenType::Xor => res = "^".to_string(), + TokenType::Power => res = "**".to_string(), + TokenType::SelfAdd => res = "+=".to_string(), + TokenType::SelfSub => res = "-=".to_string(), + TokenType::SelfMul => res = "*=".to_string(), + TokenType::SelfDiv => res = "/=".to_string(), + TokenType::SelfExactDiv => res = "//=".to_string(), + TokenType::SelfMod => res = "%=".to_string(), + TokenType::SelfPower => res = "**=".to_string(), + TokenType::SelfBitNot => res = "~=".to_string(), + TokenType::SelfBitLeftShift => res = "<<=".to_string(), + TokenType::SelfBitRightShift => res = ">>=".to_string(), + TokenType::SelfBitAnd => res = "&=".to_string(), + TokenType::SelfBitOr => res = "|=".to_string(), + TokenType::SelfXor => res = "^=".to_string(), + TokenType::IntValue => res = "integer".to_string(), + TokenType::StringValue => res = "string".to_string(), + TokenType::FloatValue => res = "float".to_string(), + TokenType::LongIntValue => res = "long integer".to_string(), + TokenType::Assign => res = "=".to_string(), + TokenType::Store => res = ":=".to_string(), + TokenType::Equal => res = "==".to_string(), + TokenType::NotEqual => res = "!=".to_string(), + TokenType::Greater => res = ">".to_string(), + TokenType::Less => res = "<".to_string(), + TokenType::LessEqual => res = "<=".to_string(), + TokenType::GreaterEqual => res = ">=".to_string(), + TokenType::Not => res = "!".to_string(), + TokenType::Or => res = "||".to_string(), + TokenType::And => res = "&&".to_string(), + TokenType::Colon => res = ":".to_string(), + TokenType::Semicolon => res = ";".to_string(), + TokenType::ID => res = "identifier".to_string(), + TokenType::While => res = "while".to_string(), + TokenType::For => res = "for".to_string(), + TokenType::If => res = "if".to_string(), + TokenType::Else => res = "else".to_string(), + TokenType::Class => res = "class".to_string(), + TokenType::Match => res = "match".to_string(), + TokenType::Func => res = "func".to_string(), + TokenType::EndOfLine => res = "end of line".to_string(), + } + write!(f, "{}", res) + } } #[derive(PartialEq, Debug)] pub struct Token { - tp: TokenType, - data: Data, + pub tp: TokenType, + pub data: Option, } -pub struct TokenLex<'code> { - compiler_data: &'code mut Compiler, +struct BraceRecord { + c: char, + line: usize, } -impl Token { - fn new(tp: TokenType, data: Option) -> Token { - match data { - Some(data) => Token { tp, data }, - None => Token { - tp, - data: Data::NONEDATA, - }, - } +impl BraceRecord { + fn new(c: char, line: usize) -> BraceRecord { + BraceRecord { c, line } } } -impl Iterator for TokenLex<'_> { - type Item = Token; - fn next(&mut self) -> Option { - self.next_token() - } +pub struct TokenLex<'code> { + pub compiler_data: &'code mut Compiler, + braces_check: Vec, + unget_token: Vec, } -macro_rules! binary_symbol { - ($a:expr, $b:expr, $binary_sym:expr, $sself:expr) => {{ - let c = $sself.compiler_data.input.read(); - if c == $binary_sym { - return Token::new($b, None) - } - $sself.compiler_data.input.unread(c); - Token::new($a, None) - }} +impl Token { + fn new(tp: TokenType, data: Option) -> Token { + Token { tp, data } + } } -macro_rules! self_symbol { -($sym:expr, $self_sym:expr, $sself:expr) => - {{ - binary_symbol!($sym, $self_sym, '=', $sself) +macro_rules! check_braces_match { + ($sself:expr, $should_be_matched:expr, $brace_record:expr, $($front_brace:expr => $after_brace:expr),*) => {{ + match $brace_record.c { + $( + $front_brace => { + if $should_be_matched != $after_brace { + return Err(error::RuntimeError::new( + Box::new(Content::new_line(&$sself.compiler_data.content.module_name, $brace_record.line)), + ErrorInfo::new( + gettext!(error::UNMATCHED_BRACE, $brace_record.c), + gettext(error::SYNTAX_ERROR), + ), + )); + } + }, + )* + _ => { + panic!("unmatched {}", $brace_record.c) + } + } }} } -macro_rules! double_symbol { - ($before_sym:expr, $before_self_sym:expr, $matched_sym:expr, $matched_self_sym:expr, matched_char:expr, $sself:expr) => { +macro_rules! hash_map { + ($($key:expr => $val:expr),*) => { { - let c = $sself.compiler_data.input.read(); - if c == $matched_char { - return self_symbol!($matched_sym, $matched_self_sym, self) - } - self.compiler_data.input.unread(c); - return self_symbol!($before_sym, $before_self_sym, self); + use std::collections::hash_map::HashMap; + let mut ret = HashMap::new(); + $( + ret.insert($key, $val); + )* + ret } }; } +lazy_static! { + static ref KEYWORDS: HashMap = hash_map![ + String::from("while") => TokenType::While, + String::from("for") => TokenType::For, + String::from("if") => TokenType::If, + String::from("else") => TokenType::Else, + String::from("class") => TokenType::Class, + String::from("func") => TokenType::Func, + String::from("match") => TokenType::Match + ]; +} + +enum NumValue { + Integer(String), + Float(String, String), +} + impl TokenLex<'_> { pub fn new<'a>(compiler_data: &'a mut Compiler) -> TokenLex<'a> { - TokenLex { compiler_data } + TokenLex { + compiler_data, + braces_check: vec![], + unget_token: vec![], + } } - fn lex_symbol(&mut self, c: char) -> Token { - match c { - '.' => Token::new(TokenType::DOT, None), - ',' => Token::new(TokenType::COMMA, None), - '{' => Token::new(TokenType::LEFT_BIG_BRACE, None), - '}' => Token::new(TokenType::RIGHT_BIG_BRACE, None), - '[' => Token::new(TokenType::LEFT_MIDDLE_BRACE, None), - ']' => Token::new(TokenType::RIGHT_MIDDLE_BRACE, None), - '(' => Token::new(TokenType::LEFT_SMALL_BRACE, None), - ')' => Token::new(TokenType::RIGHT_SMALL_BRACE, None), - '+' => self_symbol!(TokenType::ADD, TokenType::SELF_ADD, self), - '-' => self_symbol!(TokenType::SUB, TokenType::SELF_SUB, self), - '*' => { - let c = self.compiler_data.input.read(); - if c == '*' { - return self_symbol!(TokenType::POWER, TokenType::SELF_POWER, self) - } - self.compiler_data.input.unread(c); - return self_symbol!(TokenType::MUL, TokenType::SELF_MUL, self); - }, - '%' => self_symbol!(TokenType::MOD, TokenType::SELF_MOD, self), - '/' => { + fn check_braces_stack(&mut self, c: char) -> Result<(), RuntimeError> { + let top = self.braces_check.pop(); + match top { + None => { + return Err(RuntimeError::new( + Box::new(self.compiler_data.content.clone()), + ErrorInfo::new( + gettext!(error::UNMATCHED_BRACE, c), + gettext(error::SYNTAX_ERROR), + ), + )); + } + Some(cc) => { + check_braces_match!(self, c, cc, + '{' => '}', + '[' => ']', + '(' => ')' + ); + Ok(()) + } + } + } + + fn lex_id(&mut self, c: char) -> error::RunResult { + Ok({ + let mut retname: String = String::from(c); + loop { let c = self.compiler_data.input.read(); - if c == '=' { - return Token::new(TokenType::SELF_DIV, None) + if Self::is_id_char(c) { + retname.push(c); + } else { + self.compiler_data.input.unread(c); + break; } - self.compiler_data.input.unread(c); - Token::new(TokenType::DIV, None) - }, - '=' => binary_symbol!(TokenType::ASSIGN, TokenType::EQUAL, '=', self), - '!' => binary_symbol!(TokenType::NOT, TokenType::UNEQUAL, '=', self), - '>' => binary_symbol!(TokenType::GREATER, TokenType::GREATER_EQUAL, '=', self), - '<' => binary_symbol!(TokenType::LESS, TokenType::LESS_EQUAL, '=', self), - _ => panic!("Not a symbol.Compiler error"), + } + let tmp = KEYWORDS.get(&retname); + match tmp { + Some(val) => Token::new((*val).clone(), None), + None => Token::new( + TokenType::ID, + Some(self.compiler_data.const_pool.add_id(retname)), + ), + } + }) + } + + fn check_whether_symbol(c: char) -> bool { + match c { + '.' | ',' | '{' | '}' | '[' | ']' | '(' | ')' | '+' | '-' | '*' | '%' | '/' | '=' + | '!' | '>' | '<' | '~' | '^' | '|' | ':' | ';' => true, + _ => false, } } - fn lex_num(&mut self, c: char) -> Token { - // to save the int in str - let mut s = String::new(); + fn is_useless_char(c: char) -> bool { + match c { + ' ' | '\n' | '\t' | '\0' => true, + _ => false, + } + } + + fn is_string_begin(c: char) -> bool { + match c { + '"' | '\'' => true, + _ => false, + } + } + + fn is_id_char(c: char) -> bool { + if Self::check_whether_symbol(c) + || c.is_digit(10) + || Self::is_string_begin(c) + || Self::is_useless_char(c) + { + false + } else { + true + } + } + + fn lex_symbol(&mut self, c: char) -> error::RunResult { + Ok(match c { + '.' => Token::new(TokenType::Dot, None), + ',' => Token::new(TokenType::Comma, None), + '{' => { + self.braces_check + .push(BraceRecord::new(c, self.compiler_data.content.get_line())); + Token::new(TokenType::LeftBigBrace, None) + } + '}' => { + self.check_braces_stack(c)?; + Token::new(TokenType::RightBigBrace, None) + } + '[' => { + self.braces_check + .push(BraceRecord::new(c, self.compiler_data.content.get_line())); + Token::new(TokenType::LeftMiddleBrace, None) + } + ']' => { + self.check_braces_stack(c)?; + Token::new(TokenType::RightMiddleBrace, None) + } + '(' => { + self.braces_check + .push(BraceRecord::new(c, self.compiler_data.content.get_line())); + Token::new(TokenType::LeftSmallBrace, None) + } + ')' => { + self.check_braces_stack(c)?; + Token::new(TokenType::RightSmallBrace, None) + } + '+' => self.self_symbol(TokenType::Add, TokenType::SelfAdd), + '-' => self.self_symbol(TokenType::Sub, TokenType::SelfSub), + '*' => self.double_symbol( + TokenType::Mul, + TokenType::SelfMul, + TokenType::Power, + TokenType::SelfPower, + '*', + ), + '%' => self.self_symbol(TokenType::Mod, TokenType::SelfMod), + '/' => self.double_symbol( + TokenType::Div, + TokenType::SelfDiv, + TokenType::ExactDiv, + TokenType::SelfExactDiv, + '/', + ), + '=' => self.binary_symbol(TokenType::Assign, TokenType::Equal, '='), + '!' => self.binary_symbol(TokenType::Not, TokenType::NotEqual, '='), + '>' => self.double_symbol( + TokenType::Greater, + TokenType::GreaterEqual, + TokenType::BitRightShift, + TokenType::SelfBitRightShift, + '>', + ), + '<' => self.double_symbol( + TokenType::Less, + TokenType::LessEqual, + TokenType::BitLeftShift, + TokenType::SelfBitLeftShift, + '<', + ), + '~' => Token::new(TokenType::BitNot, None), + '^' => Token::new(TokenType::Xor, None), + '|' => self.binary_symbol(TokenType::Or, TokenType::BitOr, '|'), + ':' => Token::new(TokenType::Colon, None), + ';' => Token::new(TokenType::Semicolon, None), + _ => { + panic!("Not a symbol.Compiler error") + } + }) + } + + /// lex only an integer + fn lex_num_integer(&mut self, c: char, radix: u32) -> String { + let mut s = String::from(c); + let mut presecnt_lex; + loop { + presecnt_lex = self.compiler_data.input.read(); + if presecnt_lex == '_' { + continue; + } + if presecnt_lex.is_digit(radix) { + s.push(presecnt_lex); + } else { + self.compiler_data.input.unread(presecnt_lex); + break; + } + } + s + } + + fn lex_int_float(&mut self, mut c: char) -> RunResult { // the radix of result let mut radix = 10; - let presecnt_lex; + let mut prefix = String::new(); if c == '0' { - presecnt_lex = self.compiler_data.input.read(); - match presecnt_lex { - '\0' => { - return Token::new(TokenType::INT_VALUE, Some(Data::Ind(INT_VAL_POOL_ZERO))); + // check the radix + c = self.compiler_data.input.read(); + match c { + 'x' | 'X' => { + prefix = String::from("0x"); + radix = 16; } - _ => match presecnt_lex { - 'x' | 'X' => { - s += "0x"; - radix = 16; - } - 'b' | 'B' => { - s += "0b"; - radix = 2; - } - 'o' | 'O' => { - s += "0o"; - radix = 8; - } - _ => {} - }, + 'b' | 'B' => { + prefix = String::from("0b"); + radix = 2; + } + 'o' | 'O' => { + prefix = String::from("0o"); + radix = 8; + } + _ => { + self.compiler_data.input.unread(c); + return Ok(NumValue::Integer(String::from("0"))); + } + } + c = self.compiler_data.input.read(); + } + let intpart = format!("{prefix}{}", self.lex_num_integer(c, radix)); + if c == '.' { + // float can be used with prefix + if !prefix.is_empty() { + return Err(RuntimeError::new( + Box::new(self.compiler_data.content.clone()), + ErrorInfo::new(gettext!(PREFIX_FOR_FLOAT, prefix), gettext(SYNTAX_ERROR)), + )); } + // float mode + c = self.compiler_data.input.read(); + let float_part = self.lex_num_integer(c, radix); + if float_part.len() + intpart.len() > FLOAT_OVER_FLOW_LIMIT { + // overflow + return Err(RuntimeError::new( + Box::new(self.compiler_data.content.clone()), + ErrorInfo::new( + gettext!(FLOAT_OVER_FLOW, format!("{intpart}.{float_part}")), + gettext(NUMBER_OVER_FLOW), + ), + )); + } + return Ok(NumValue::Float(intpart, float_part)); } else { - s = c.to_string(); + self.compiler_data.input.unread(c); } - loop { - match self.compiler_data.input.read() { - '\0' => { - break; + Ok(NumValue::Integer(intpart)) + } + + fn turn_to_token(&mut self, val: NumValue) -> Token { + match val { + NumValue::Float(v1, v2) => Token::new( + TokenType::FloatValue, + Some( + self.compiler_data + .const_pool + .add_float(Float::new(v1.parse().unwrap(), v2.parse().unwrap())), + ), + ), + NumValue::Integer(it) => Token::new( + TokenType::IntValue, + Some(self.compiler_data.const_pool.add_int(it.parse().unwrap())), + ), + } + } + + fn lex_num(&mut self, mut c: char) -> RunResult { + let tmp = self.lex_int_float(c)?; + c = self.compiler_data.input.read(); + if c == 'e' || c == 'E' { + c = self.compiler_data.input.read(); + let mut up: i32 = self.lex_num_integer(c, 10).parse().unwrap(); + match tmp { + NumValue::Integer(mut it) => { + if up >= 0 { + // 保留int身份 + for i in 0..up { + it.push('0'); + } + return Ok(Token::new( + TokenType::IntValue, + Some(self.compiler_data.const_pool.add_int(it.parse().unwrap())), + )); + } else { + // 负数次,升级为float + let mut float_part = String::new(); + up = -up; + for i in 0..up { + let tmp = it.pop(); + match tmp { + None => { + float_part.insert(0, '0'); + } + Some(c) => { + float_part.insert(0, c); + } + } + } + if it.is_empty() { + it = String::from("0"); + } + return Ok(Token::new( + TokenType::FloatValue, + Some(self.compiler_data.const_pool.add_float(Float::new( + it.parse().unwrap(), + float_part.parse().unwrap(), + ))), + )); + } } - c => { - if c.is_digit(radix) { - s.push(c); + NumValue::Float(mut v1, mut v2) => { + if up >= 0 { + for i in 0..up { + if v2.is_empty() { + v1.push('0'); + } else { + let tmp = v2.remove(0); + v1.push(tmp); + } + } + if v2.is_empty() { + v2 = String::from("0"); + } + return Ok(Token::new( + TokenType::FloatValue, + Some( + self.compiler_data.const_pool.add_float(Float::new( + v1.parse().unwrap(), + v2.parse().unwrap(), + )), + ), + )); } else { - self.compiler_data.input.unread(c); - break; + up = -up; + for i in 0..up { + let tmp = v1.pop(); + match tmp { + Some(c) => { + v2.insert(0, c); + } + None => { + v2.insert(0, '0'); + } + } + } + if v1.is_empty() { + v1 = String::from('0'); + } + return Ok(Token::new( + TokenType::FloatValue, + Some( + self.compiler_data.const_pool.add_float(Float::new( + v1.parse().unwrap(), + v2.parse().unwrap(), + )), + ), + )); } } } + } else { + self.compiler_data.input.unread(c); + return Ok(self.turn_to_token(tmp)); } - Token::new( - TokenType::INT_VALUE, - Some(Data::Ind(self.compiler_data.const_pool.add_int(s.parse().expect("wrong string to int")))), - ) } - fn lex_str(&mut self, start_char: char) -> Token { + fn lex_str(&mut self, start_char: char) -> error::RunResult { let mut s = String::new(); let mut c = self.compiler_data.input.read(); while c != start_char { @@ -246,6 +640,7 @@ impl TokenLex<'_> { '\\' => '\\', '"' => '"', '\'' => '\'', + '0' => '\0', _ => { s.push('\\'); c @@ -255,124 +650,373 @@ impl TokenLex<'_> { s.push(c); c = self.compiler_data.input.read(); if c == '\0' { - error::report_error( - &self.compiler_data.content, + error::RuntimeError::new( + Box::new(self.compiler_data.content.clone()), error::ErrorInfo::new( gettext!(error::STRING_WITHOUT_END, start_char), - error::SYNTAX_ERROR, + gettext(error::SYNTAX_ERROR), ), ); } } - Token::new(TokenType::STRING_VALUE, Some(Data::Ind(self.compiler_data.const_pool.add_string(s)))) + Ok(Token::new( + TokenType::StringValue, + Some(self.compiler_data.const_pool.add_string(s)), + )) } - fn next_token(&mut self) -> Option { - let mut presecnt_lex = self.compiler_data.input.read(); + pub fn next_token(&mut self) -> error::RunResult> { + if !self.unget_token.is_empty() { + let tmp = self.unget_token.pop().unwrap(); + if tmp.tp == TokenType::EndOfLine { + self.compiler_data.content.add_line(); + } + return Ok(Some(tmp)); + } + let mut presecnt_lex; loop { + presecnt_lex = self.compiler_data.input.read(); match presecnt_lex { '\0' => { - return None; + return Ok(None); } - c => match c { - '\t' | ' ' => { - continue; - } - '\n' => { - self.compiler_data.content.add_line(); - } - _ => break, - }, + '\t' | ' ' => { + continue; + } + '\n' => { + self.compiler_data.content.add_line(); + } + _ => break, } - presecnt_lex = self.compiler_data.input.read(); } if presecnt_lex.is_digit(10) { - return Some(self.lex_num(presecnt_lex)); + return Ok(Some(self.lex_num(presecnt_lex)?)); + } + if Self::is_string_begin(presecnt_lex) { + return Ok(Some(self.lex_str(presecnt_lex)?)); + } + if Self::check_whether_symbol(presecnt_lex) { + return Ok(Some(self.lex_symbol(presecnt_lex)?)); + } + Ok(Some(self.lex_id(presecnt_lex)?)) + } + + fn next_back(&mut self, t: Token) { + if t.tp == TokenType::EndOfLine { + self.compiler_data.content.del_line(); + } + self.unget_token.push(t); + } + + pub fn check(&mut self) -> Result<(), RuntimeError> { + if !self.braces_check.is_empty() { + let unmatch_char = self.braces_check.pop().unwrap(); + return Err(error::RuntimeError::new( + Box::new(Content::new_line( + &self.compiler_data.content.module_name, + unmatch_char.line, + )), + ErrorInfo::new( + gettext!(error::UNMATCHED_BRACE, unmatch_char.c), + gettext(error::SYNTAX_ERROR), + ), + )); + } + Ok(()) + } + + fn binary_symbol(&mut self, a: TokenType, b: TokenType, binary_sym: char) -> Token { + let c = self.compiler_data.input.read(); + if c == binary_sym { + Token::new(b, None) + } else { + self.compiler_data.input.unread(c); + Token::new(a, None) + } + } + + fn self_symbol(&mut self, sym: TokenType, self_sym: TokenType) -> Token { + self.binary_symbol(sym, self_sym, '=') + } + + fn double_symbol( + &mut self, + before_sym: TokenType, + before_self_sym: TokenType, + matched_sym: TokenType, + matched_self_sym: TokenType, + matched_char: char, + ) -> Token { + let c = self.compiler_data.input.read(); + if c == matched_char { + self.self_symbol(matched_sym, matched_self_sym) + } else { + self.compiler_data.input.unread(c); + self.self_symbol(before_sym, before_self_sym) } - if presecnt_lex == '\'' || presecnt_lex == '"' { - return Some(self.lex_str(presecnt_lex)); + } +} + +impl Drop for TokenLex<'_> { + fn drop(&mut self) { + // check the braces stack + match self.check() { + Err(e) => { + eprintln!("{}", e); + exit(1); + } + _ => {} } - Some(self.lex_symbol(presecnt_lex)) } } #[cfg(test)] mod tests { - use crate::compiler::{InputSource, Option}; + use std::{collections::HashSet, hash::Hash}; use super::*; + use crate::compiler::{Float, InputSource, Option, Pool, INT_VAL_POOL_ONE}; + + macro_rules! gen_test_token_env { + ($test_string:expr, $env_name:ident) => { + let mut env = Compiler::new_string_compiler( + Option::new(false, InputSource::StringInternal), + $test_string, + ); + let mut $env_name = TokenLex::new(&mut env); + }; + } fn check(tokenlex: &mut TokenLex, expected_res: Vec) { for i in expected_res { - assert_eq!(i, tokenlex.next().unwrap()); + assert_eq!(i, tokenlex.next_token().unwrap().unwrap()); + } + assert_eq!(None, tokenlex.next_token().unwrap()); + tokenlex.check().unwrap(); + } + + /// check const pool + fn check_pool(v: Vec, pool_be_checked: &Pool) + where + T: Eq + Hash + Clone, + { + let mut testpool: HashSet = HashSet::new(); + for i in &v { + testpool.insert((*i).clone()); + } + assert_eq!(testpool.len(), pool_be_checked.len()); + for i in &testpool { + assert!(pool_be_checked.contains_key(i)); } - assert_eq!(None, tokenlex.next()); } #[test] fn test_numberlex() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), + gen_test_token_env!( r#",,.,100 - - + + 123.9 232_304904 0b011 0x2aA4 - 0o2434 0 0"#, + 0o2434 0 0 1e3.8 1e9 1.2e1 8e-1"#, + t + ); + check( + &mut t, + vec![ + Token::new(TokenType::Comma, None), + Token::new(TokenType::Comma, None), + Token::new(TokenType::Dot, None), + Token::new(TokenType::Comma, None), + Token::new(TokenType::FloatValue, Some(0)), + Token::new(TokenType::IntValue, Some(1)), + Token::new(TokenType::IntValue, Some(2)), + Token::new(TokenType::IntValue, Some(3)), + Token::new(TokenType::IntValue, Some(4)), + Token::new(TokenType::IntValue, Some(INT_VAL_POOL_ZERO)), + Token::new(TokenType::IntValue, Some(INT_VAL_POOL_ZERO)), + Token::new(TokenType::FloatValue, Some(1)), + Token::new(TokenType::IntValue, Some(5)), + Token::new(TokenType::FloatValue, Some(2)), + Token::new(TokenType::FloatValue, Some(3)), + ], + ); + check_pool( + vec![100, 232_304904, 0b011, 0x2aA4, 0, 1], + &t.compiler_data.const_pool.const_ints, + ); + check_pool( + vec![ + Float::new(123, 9), + Float::new(1, 2), + Float::new(1000, 8), + Float::new(0, 8), + ], + &t.compiler_data.const_pool.const_floats, ); - let mut t = TokenLex::new(&mut env); - let res = vec![ - Token::new(TokenType::COMMA, None), - Token::new(TokenType::COMMA, None), - Token::new(TokenType::DOT, None), - Token::new(TokenType::COMMA, None), - Token::new(TokenType::FLOAT_VALUE, Some(Data::Ind(0))), - Token::new(TokenType::INT_VALUE, Some(Data::Ind(1))), - Token::new(TokenType::INT_VALUE, Some(Data::Ind(2))), - Token::new(TokenType::INT_VALUE, Some(Data::Ind(3))), - Token::new(TokenType::INT_VALUE, Some(Data::Ind(4))), - Token::new(TokenType::INT_VALUE, Some(Data::Ind(INT_VAL_POOL_ZERO))), - Token::new(TokenType::INT_VALUE, Some(Data::Ind(INT_VAL_POOL_ZERO))), - ]; - check(&mut t, res); } #[test] fn test_symbol_lex() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), - r#":{}[]()+=%=//= // /=** *=*"#, + gen_test_token_env!( + r#":{}[]()+=%=//= // /=** *=*, + >><< >>="#, + t + ); + check( + &mut t, + vec![ + Token::new(TokenType::Colon, None), + Token::new(TokenType::LeftBigBrace, None), + Token::new(TokenType::RightBigBrace, None), + Token::new(TokenType::LeftMiddleBrace, None), + Token::new(TokenType::RightMiddleBrace, None), + Token::new(TokenType::LeftSmallBrace, None), + Token::new(TokenType::RightSmallBrace, None), + Token::new(TokenType::SelfAdd, None), + Token::new(TokenType::SelfMod, None), + Token::new(TokenType::SelfExactDiv, None), + Token::new(TokenType::ExactDiv, None), + Token::new(TokenType::SelfDiv, None), + Token::new(TokenType::Power, None), + Token::new(TokenType::SelfMul, None), + Token::new(TokenType::Mul, None), + Token::new(TokenType::Comma, None), + Token::new(TokenType::BitRightShift, None), + Token::new(TokenType::BitLeftShift, None), + Token::new(TokenType::SelfBitRightShift, None), + ], ); - let mut t = TokenLex::new(&mut env); - let res = vec![ - Token::new(TokenType::STRING_VALUE, Some(Data::Ind(0))) - ]; - check(&mut t, res); } #[test] fn test_string_lex() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), - r#""s"'sd''sdscdcdfvf'"depkd"''"\n\t"'ttt\tt'"#, + gen_test_token_env!(r#""s"'sd''sdscdcdfvf'"depkd"''"\n\t"'ttt\tt'"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::StringValue, Some(0)), + Token::new(TokenType::StringValue, Some(1)), + Token::new(TokenType::StringValue, Some(2)), + Token::new(TokenType::StringValue, Some(3)), + Token::new(TokenType::StringValue, Some(4)), + Token::new(TokenType::StringValue, Some(5)), + Token::new(TokenType::StringValue, Some(6)), + ], + ); + check_pool( + vec![ + String::from("s"), + String::from("sd"), + String::from("sdscdcdfvf"), + String::from("depkd"), + String::from(""), + String::from("\n\t"), + String::from("ttt\tt"), + ], + &t.compiler_data.const_pool.const_strings, ); - let res = vec![ - Token::new(TokenType::STRING_VALUE, Some(Data::Ind(0))) - ]; } #[test] fn test_comprehensive_lex() {} + #[test] + fn test_id_lex() { + gen_test_token_env!(r#"id fuck _fuck 天帝abc abc天帝"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::ID, Some(0)), + Token::new(TokenType::ID, Some(1)), + Token::new(TokenType::ID, Some(2)), + Token::new(TokenType::ID, Some(3)), + Token::new(TokenType::ID, Some(4)), + ], + ); + check_pool( + vec![ + String::from("id"), + String::from("fuck"), + String::from("_fuck"), + String::from("天帝abc"), + String::from("abc天帝"), + ], + &t.compiler_data.const_pool.name_pool, + ); + } + + #[test] + fn test_wrong_number1() { + gen_test_token_env!(r#"0b123"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::IntValue, Some(INT_VAL_POOL_ONE)), + Token::new(TokenType::IntValue, Some(2)), + ], + ); + check_pool(vec![0b1, 23, 0], &t.compiler_data.const_pool.const_ints); + } + + #[test] + fn test_wrong_number2() { + gen_test_token_env!(r#"0xabchds"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::IntValue, Some(2)), + Token::new(TokenType::ID, Some(0)), + ], + ); + check_pool(vec![0xabc], &t.compiler_data.const_pool.const_ints); + check_pool( + vec![String::from("hds")], + &t.compiler_data.const_pool.name_pool, + ); + } + + #[test] + fn test_next_back() { + gen_test_token_env!(r#":()"#, t); + let tmp = t.next_token().unwrap().unwrap(); + assert_eq!(tmp.tp, TokenType::Colon); + t.next_back(tmp); + assert_eq!(t.next_token().unwrap().unwrap().tp, TokenType::Colon); + check( + &mut t, + vec![ + Token::new(TokenType::LeftSmallBrace, None), + Token::new(TokenType::RightSmallBrace, None), + ], + ); + } + + #[test] + #[should_panic] + fn test_braces_check2() { + gen_test_token_env!(r#":)|"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::Colon, None), + Token::new(TokenType::LeftSmallBrace, None), + Token::new(TokenType::BitAnd, None), + ], + ); + } + #[test] #[should_panic] - fn test_wrong_number() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), - r#"0xtghhy 0b231"#, + fn test_braces_check1() { + gen_test_token_env!(r#":("#, t); + check( + &mut t, + vec![ + Token::new(TokenType::Colon, None), + Token::new(TokenType::LeftSmallBrace, None), + ], ); - let t = TokenLex::new(&mut env); - for _ in t {} } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index df0259ec..5b8370f3 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -18,19 +18,19 @@ struct Args { #[derive(Debug, Subcommand)] enum Commands { - build { optimize: bool }, - tshell {}, + Build { optimize: bool }, + Tshell {}, } pub fn run() { let cli = Args::parse(); match cli.mode { - Commands::build { optimize: opt } => { + Commands::Build { optimize: opt } => { for i in cli.files { tools::compile(compiler::Option::new(opt, compiler::InputSource::File(i))); } } - Commands::tshell {} => { + Commands::Tshell {} => { tools::tshell::tshell(); } }; diff --git a/rust/src/tools/compile_tool.rs b/rust/src/tools/compile_tool.rs index 69ab950c..616c7446 100644 --- a/rust/src/tools/compile_tool.rs +++ b/rust/src/tools/compile_tool.rs @@ -1,3 +1,14 @@ +use std::process::exit; + use crate::compiler; -pub fn compile(opt: compiler::Option) {} +pub fn compile(opt: compiler::Option) { + let mut compiler = compiler::Compiler::new(opt); + match compiler.lex() { + Ok(data) => {} + Err(e) => { + eprintln!("{}", e); + exit(1) + } + } +} diff --git a/rust/src/tvm.rs b/rust/src/tvm.rs index 022a058f..084e0634 100644 --- a/rust/src/tvm.rs +++ b/rust/src/tvm.rs @@ -1,31 +1,20 @@ mod algo; +mod def; mod function; +mod gc; mod types; -mod def; -use clap::error; +use crate::base::codegen::{self, StaticData}; use gettextrs::gettext; use crate::{ - base::error::{ErrorContent, report_error, ErrorInfo, VM_DATA_NUMBER, VM_ERROR}, + base::error::{ + ErrorContent, ErrorInfo, RuntimeError, VM_DATA_NUMBER, VM_ERROR, VM_FRAME_EMPTY, + }, cfg, }; -pub struct ConstPool { - pub intpool: Vec, - pub stringpool: Vec, - pub floatpool: Vec, -} - -impl ConstPool { - pub fn new() -> Self { - Self { - intpool: Vec::new(), - stringpool: Vec::new(), - floatpool: Vec::new(), - } - } -} +use self::types::trcint::TrcInt; pub struct DynaData<'a> { obj_stack: Vec>, @@ -41,20 +30,14 @@ impl<'a> DynaData<'a> { } } -pub struct Inst { - opcode: Opcode, - operand: i32, -} - pub struct Vm<'a> { - constpool: ConstPool, - inst: Vec, - funcs: Vec, run_contnet: Content, dynadata: DynaData<'a>, pc: usize, + static_data: StaticData, } +#[derive(Debug, Clone)] struct Content { module_name: String, line_pos: usize, @@ -87,47 +70,46 @@ impl Content { } } -enum Opcode { - Add, - Sub, - Mul, - Div, - ExtraDiv, - Mod, - Eq, - Ne, - Lt, - Le, - Gt, - Ge, - And, - Or, - Not, - // change the option code index - Goto, - // return from a function - PopFrame, - // create a frame to hold the function - NewFrame, - // Load a int from const pool - LoadInt -} - /// reduce the duplicate code to solve the operator running -macro_rules! OP { +macro_rules! binary_opcode { ($trait_used:ident, $sself:expr) => {{ let t1 = $sself.dynadata.obj_stack.pop(); let t2 = $sself.dynadata.obj_stack.pop(); if t1.is_none() || t2.is_none() { - report_error(&$sself.run_contnet, ErrorInfo::new(gettext!(VM_DATA_NUMBER, 2), VM_ERROR)); + return Err(RuntimeError::new( + Box::new($sself.run_contnet.clone()), + ErrorInfo::new(gettext!(VM_DATA_NUMBER, 2), gettext(VM_ERROR)), + )); } let t1 = t1.unwrap(); let t2 = t2.unwrap(); let ret = t1.$trait_used(t2); match ret { Err(e) => { - report_error(&$sself.run_contnet, e); - }, + return Err(RuntimeError::new(Box::new($sself.run_contnet.clone()), e)); + } + Ok(t) => { + $sself.dynadata.obj_stack.push(t); + } + } + }}; +} + +macro_rules! unary_opcode { + ($trait_used:ident, $sself:expr) => {{ + let t1 = $sself.dynadata.obj_stack.pop(); + if t1.is_none() { + return Err(RuntimeError::new( + Box::new($sself.run_contnet.clone()), + ErrorInfo::new(gettext!(VM_DATA_NUMBER, 1), gettext(VM_ERROR)), + )); + } + let t1 = t1.unwrap(); + let ret = t1.$trait_used(); + match ret { + Err(e) => { + return Err(RuntimeError::new(Box::new($sself.run_contnet.clone()), e)); + } Ok(t) => { $sself.dynadata.obj_stack.push(t); } @@ -138,41 +120,75 @@ macro_rules! OP { impl<'a> Vm<'a> { pub fn new() -> Self { Self { - constpool: ConstPool::new(), - inst: Vec::new(), pc: 0, - funcs: vec![], dynadata: DynaData::new(), run_contnet: Content::new(cfg::MAIN_MODULE_NAME), + static_data: StaticData::new(), } } - pub fn run(&mut self) { - while self.pc < self.inst.len() { - match self.inst[self.pc].opcode { - Opcode::Add => OP!(add, self), - Opcode::Sub => OP!(sub, self), - Opcode::Mul => OP!(mul, self), - Opcode::Div => OP!(div, self), - Opcode::ExtraDiv => OP!(extra_div, self), - Opcode::Mod => OP!(modd, self), - Opcode::Gt => OP!(gt, self), - Opcode::Lt => OP!(lt, self), - Opcode::Ge => OP!(ge, self), - Opcode::Le => OP!(le, self), - Opcode::Eq => OP!(eq, self), - Opcode::Ne => OP!(ne, self), - Opcode::And => OP!(and, self), - Opcode::Or => OP!(or, self), - Opcode::NewFrame => {} - Opcode::PopFrame => { - self.dynadata.frames_stack.pop(); + pub fn new_init(static_data: StaticData) -> Self { + Self { + pc: 0, + + dynadata: DynaData::new(), + run_contnet: Content::new(cfg::MAIN_MODULE_NAME), + static_data, + } + } + + pub fn run(&mut self) -> Result<(), RuntimeError> { + while self.pc < self.static_data.inst.len() { + match self.static_data.inst[self.pc].opcode { + codegen::Opcode::Add => binary_opcode!(add, self), + codegen::Opcode::Sub => binary_opcode!(sub, self), + codegen::Opcode::Mul => binary_opcode!(mul, self), + codegen::Opcode::Div => binary_opcode!(div, self), + codegen::Opcode::ExtraDiv => binary_opcode!(extra_div, self), + codegen::Opcode::Mod => binary_opcode!(modd, self), + codegen::Opcode::Gt => binary_opcode!(gt, self), + codegen::Opcode::Lt => binary_opcode!(lt, self), + codegen::Opcode::Ge => binary_opcode!(ge, self), + codegen::Opcode::Le => binary_opcode!(le, self), + codegen::Opcode::Eq => binary_opcode!(eq, self), + codegen::Opcode::Ne => binary_opcode!(ne, self), + codegen::Opcode::And => binary_opcode!(and, self), + codegen::Opcode::Or => binary_opcode!(or, self), + codegen::Opcode::Power => binary_opcode!(power, self), + codegen::Opcode::Not => unary_opcode!(not, self), + codegen::Opcode::Xor => binary_opcode!(xor, self), + codegen::Opcode::NewFrame => {} + codegen::Opcode::PopFrame => { + let ret = self.dynadata.frames_stack.pop(); + if let None = ret { + return Err(RuntimeError::new( + Box::new(self.run_contnet.clone()), + ErrorInfo::new(gettext(VM_FRAME_EMPTY), gettext(VM_ERROR)), + )); + } + } + codegen::Opcode::Goto => { + self.pc = self.static_data.inst[self.pc].operand; } - _ => { - panic!("unknown opcode"); + codegen::Opcode::LoadInt => { + self.dynadata.obj_stack.push(Box::new(TrcInt::new( + self.static_data.constpool.intpool[self.static_data.inst[self.pc].operand], + ))); } + codegen::Opcode::BitAnd => binary_opcode!(bit_and, self), + codegen::Opcode::BitOr => binary_opcode!(bit_or, self), + codegen::Opcode::BitNot => unary_opcode!(bit_not, self), + codegen::Opcode::BitLeftShift => binary_opcode!(bit_left_shift, self), + codegen::Opcode::BitRightShift => binary_opcode!(bit_right_shift, self), } self.pc += 1; } + Ok(()) } } + +#[cfg(test)] +mod tests { + #[test] + fn test_vm() {} +} diff --git a/rust/src/tvm/algo/string.rs b/rust/src/tvm/algo/string.rs index 75a597d2..f6bf81f2 100644 --- a/rust/src/tvm/algo/string.rs +++ b/rust/src/tvm/algo/string.rs @@ -8,7 +8,7 @@ pub fn kmp(main_string: &str, pattern: &str) -> usize { let pattern: Vec = pattern.chars().collect(); let mut ans = 0; for i in main_string.chars() { - while j != -1 && pattern[(j + 1) as usize] == i { + while j != -1 && pattern[(j + 1) as usize] != i { j = next_arr[j as usize] as i64; } if pattern[(j + 1) as usize] == i { @@ -16,6 +16,7 @@ pub fn kmp(main_string: &str, pattern: &str) -> usize { } if j as usize == pattern.len() - 1 { ans += 1; + j = next_arr[j as usize] as i64; } } ans @@ -38,13 +39,23 @@ pub fn kmp_next(pattern: &str) -> Vec { ret } -pub fn sa(s: &str) { - let mut sa: Vec = Vec::new(); - let mut t: HashMap = HashMap::new(); +pub fn sa(s: &str) -> Vec { + let mut sa: Vec = Vec::new(); + let mut t: HashMap = HashMap::new(); for i in s.chars() { let tmp = t.entry(i).or_insert(0); *tmp += 1; } + for i in &t { + sa.push(*i.0 as usize); + } + let mut rk: Vec = Vec::new(); + rk.resize(sa.len(), 0); + for i in &sa { + let _tmp = t.entry(char::from_u32(*i as u32).unwrap()); + // rk[] + } + sa } #[cfg(test)] @@ -56,6 +67,13 @@ mod tests { let sarray = sa(s); } + #[test] + fn sa_2() { + let s = "ababa"; + let sarray = vec![5, 3, 1, 4, 2]; + assert_eq!(sa(s), sarray); + } + #[test] fn kmp_1() { assert_eq!(kmp("ABABABC", "ABA"), 2); diff --git a/rust/src/tvm/def.rs b/rust/src/tvm/def.rs index e69de29b..8b137891 100644 --- a/rust/src/tvm/def.rs +++ b/rust/src/tvm/def.rs @@ -0,0 +1 @@ + diff --git a/rust/src/tvm/function.rs b/rust/src/tvm/function.rs index cc5ec25b..e8ebba43 100644 --- a/rust/src/tvm/function.rs +++ b/rust/src/tvm/function.rs @@ -1,14 +1,4 @@ -use super::types::TrcObj; - -pub struct Func { - name: String, -} - -impl Func { - fn new(name: String) -> Self { - Self { name } - } -} +use crate::base::func; /// A content structure which hold the running info of the function pub struct Frame<'a> { @@ -16,7 +6,7 @@ pub struct Frame<'a> { } impl<'a> Frame<'a> { - fn new(func: &'a Func) -> Self { + fn new(func: &'a func::Func) -> Self { Self { name: &func.name } } } diff --git a/rust/src/tvm/gc.rs b/rust/src/tvm/gc.rs new file mode 100644 index 00000000..5f1f3016 --- /dev/null +++ b/rust/src/tvm/gc.rs @@ -0,0 +1 @@ +//! provide gc for trc diff --git a/rust/src/tvm/types.rs b/rust/src/tvm/types.rs index 803e4915..ab76e6f6 100644 --- a/rust/src/tvm/types.rs +++ b/rust/src/tvm/types.rs @@ -3,22 +3,27 @@ use downcast_rs::{impl_downcast, Downcast}; use gettextrs::gettext; pub mod data_structure; +pub mod trcbigint; +pub mod trcbool; pub mod trcfloat; pub mod trcint; pub mod trcstr; -pub mod trcbool; macro_rules! unsupported_operator { ($operator_name:expr, $sself:expr) => { Err(error::ErrorInfo::new( - gettext!(error::OPERATOR_IS_NOT_SUPPORT, $operator_name, $sself.get_type_name()), - error::SYNTAX_ERROR, + gettext!( + error::OPERATOR_IS_NOT_SUPPORT, + $operator_name, + $sself.get_type_name() + ), + gettext(error::SYNTAX_ERROR), )) - } + }; } /// help to generate the same error reporter functions -macro_rules! operators { +macro_rules! batch_unsupported_operators { ($($traie_name:ident => $oper_name:expr),*) => { $(fn $traie_name(&self, _ :Box) -> TypeError { unsupported_operator!($oper_name, self) @@ -28,14 +33,28 @@ macro_rules! operators { #[macro_export] macro_rules! impl_oper { - ($trait_oper_fn_name:ident, $oper:tt, $error_oper_name:expr, $self_type:ident) => { + // for unsupported operator in rust + ($trait_oper_fn_name:ident, $oper:ident, $error_oper_name:expr, $self_type:ident, $newtype:ident, $whether_throw_error:tt) => { fn $trait_oper_fn_name(&self, other:Box) -> TypeError { match other.downcast_ref::<$self_type>() { Some(v) => { - Ok(Box::new(TrcInt::new(self.value $oper v.value))) + Ok(Box::new($newtype::new($oper(self.value, v.value)$whether_throw_error))) }, None => { - Err(ErrorInfo::new(gettext!(OPERATOR_IS_NOT_SUPPORT, $error_oper_name, other.get_type_name()), OPERATOR_ERROR)) + Err(ErrorInfo::new(gettext!(OPERATOR_IS_NOT_SUPPORT, $error_oper_name, other.get_type_name()), gettext(OPERATOR_ERROR))) + } + } + } + }; + // for supported operator in rust + ($trait_oper_fn_name:ident, $oper:tt, $error_oper_name:expr, $self_type:ident, $newtype:ident) => { + fn $trait_oper_fn_name(&self, other:Box) -> TypeError { + match other.downcast_ref::<$self_type>() { + Some(v) => { + Ok(Box::new($newtype::new(self.value $oper v.value))) + }, + None => { + Err(ErrorInfo::new(gettext!(OPERATOR_IS_NOT_SUPPORT, $error_oper_name, other.get_type_name()), gettext(OPERATOR_ERROR))) } } } @@ -43,20 +62,33 @@ macro_rules! impl_oper { } #[macro_export] +/// use tvm::types::batch_impl_opers; +/// batch_impl_opers!( +/// add => +, "+", TrcInt, TrcInt, +/// sub => -, "-", TrcInt, TrcInt, +/// mul => *, "*", TrcInt, TrcInt +/// ); macro_rules! batch_impl_opers { - ($($trait_oper_fn_name:ident => $oper:tt, $error_oper_name:expr, $self_type:ident),*) => { + ($($trait_oper_fn_name:ident => $oper:tt, $error_oper_name:expr, $self_type:ident, $newtype:ident),*) => { $( - impl_oper!($trait_oper_fn_name, $oper, $error_oper_name, $self_type); + impl_oper!($trait_oper_fn_name, $oper, $error_oper_name, $self_type, $newtype); )* }; } -type TypeError= Result, error::ErrorInfo>; +#[macro_export] +macro_rules! impl_single_oper { + ($trait_oper_fn_name:ident, $oper:tt, $error_oper_name:expr, $self_type:ident, $newtype:ident) => { + fn $trait_oper_fn_name(&self) -> TypeError { + Ok(Box::new($newtype::new($oper self.value))) + } + }; +} -pub trait TrcObj:Downcast { - fn output(&self) {} +type TypeError = Result, error::ErrorInfo>; - operators!( +pub trait TrcObj: Downcast + std::fmt::Display { + batch_unsupported_operators!( sub => "-", mul => "*", add => "+", @@ -70,9 +102,23 @@ pub trait TrcObj:Downcast { ge => ">=", le => "<=", and => "and", - or => "or" + or => "or", + power => "**", + bit_and => "&", + bit_or => "|", + xor => "~", + bit_left_shift => "<<", + bit_right_shift => ">>" ); + fn not(&self) -> TypeError { + unsupported_operator!("not", self) + } + + fn bit_not(&self) -> TypeError { + unsupported_operator!("xor", self) + } + fn get_type_name(&self) -> &str; } diff --git a/rust/src/tvm/types/data_structure.rs b/rust/src/tvm/types/data_structure.rs index d3cb984d..d830f380 100644 --- a/rust/src/tvm/types/data_structure.rs +++ b/rust/src/tvm/types/data_structure.rs @@ -3,12 +3,14 @@ mod deque; mod fenwick; mod forward_list; mod hash_map; +mod heap; mod list; mod map; mod priority_queue; mod queue; mod sam; mod set; +mod splay; mod st; mod stack; mod trie; diff --git a/rust/src/tvm/types/data_structure/ac.rs b/rust/src/tvm/types/data_structure/ac.rs index 46326d2e..e00ddca9 100644 --- a/rust/src/tvm/types/data_structure/ac.rs +++ b/rust/src/tvm/types/data_structure/ac.rs @@ -72,7 +72,7 @@ impl AcAutomaton { /// ac.search("world", 2); /// let ans = ac.get_ans(); /// ```` - fn search(&self, pattern: &str, id: u32) {} + fn search(&self, _pattern: &str, _id: u32) {} fn get_ans(&self) -> HashMap { HashMap::new() @@ -84,6 +84,6 @@ mod tests { use super::*; #[test] fn ac_automaton1() { - let mut ac = AcAutomaton::new(); + let ac = AcAutomaton::new(); } } diff --git a/rust/src/tvm/types/data_structure/heap.rs b/rust/src/tvm/types/data_structure/heap.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/rust/src/tvm/types/data_structure/heap.rs @@ -0,0 +1 @@ + diff --git a/rust/src/tvm/types/data_structure/splay.rs b/rust/src/tvm/types/data_structure/splay.rs new file mode 100644 index 00000000..353d7438 --- /dev/null +++ b/rust/src/tvm/types/data_structure/splay.rs @@ -0,0 +1,42 @@ +pub struct Node { + sons: [usize; 2], + cnt: usize, + sz: usize, + fa: usize, +} + +impl Node { + fn new() -> Self { + Self { + sons: [0; 2], + sz: 0, + cnt: 0, + fa: 0, + } + } +} + +pub struct Splay { + tree: Vec, +} + +impl Splay { + fn new() -> Self { + Self { tree: vec![] } + } + + fn maintain(&mut self, id: usize) { + self.tree[id].sz = + self.tree[self.tree[id].sons[0]].sz + self.tree[self.tree[id].sons[1]].sz; + } + + fn is_right(&self, id: usize) -> bool { + id == self.tree[self.tree[id].fa].sons[1] + } +} + +#[cfg(test)] +mod tests { + #[test] + fn splay() {} +} diff --git a/rust/src/tvm/types/data_structure/st.rs b/rust/src/tvm/types/data_structure/st.rs index 953ca641..0f3c1d75 100644 --- a/rust/src/tvm/types/data_structure/st.rs +++ b/rust/src/tvm/types/data_structure/st.rs @@ -1 +1,7 @@ pub struct StTable {} + +impl StTable { + pub fn new() -> Self { + Self {} + } +} diff --git a/rust/src/tvm/types/data_structure/trie.rs b/rust/src/tvm/types/data_structure/trie.rs index 7a8f5205..e1fea3b9 100644 --- a/rust/src/tvm/types/data_structure/trie.rs +++ b/rust/src/tvm/types/data_structure/trie.rs @@ -1,3 +1,3 @@ -struct state {} +struct State {} pub struct Trie {} diff --git a/rust/src/tvm/types/trcbigint.rs b/rust/src/tvm/types/trcbigint.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/rust/src/tvm/types/trcbigint.rs @@ -0,0 +1 @@ + diff --git a/rust/src/tvm/types/trcbool.rs b/rust/src/tvm/types/trcbool.rs index 8d10493b..11aac55e 100644 --- a/rust/src/tvm/types/trcbool.rs +++ b/rust/src/tvm/types/trcbool.rs @@ -1,10 +1,37 @@ -use super::TrcObj; -pub struct TrcBool { +use super::{TrcObj, TypeError}; +use crate::base::error::{ErrorInfo, OPERATOR_ERROR, OPERATOR_IS_NOT_SUPPORT}; +use crate::{batch_impl_opers, impl_oper, impl_single_oper}; +use gettextrs::gettext; +use std::fmt::Display; +pub struct TrcBool { + pub value: bool, } impl TrcObj for TrcBool { fn get_type_name(&self) -> &str { "bool" } + + impl_single_oper!(not, !, "not", TrcBool, TrcBool); + batch_impl_opers!( + and => &&, "and", TrcBool, TrcBool, + or => ||, "or", TrcBool, TrcBool + ); +} + +impl Display for TrcBool { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.value { + write!(f, "True") + } else { + write!(f, "False") + } + } +} + +impl TrcBool { + pub fn new(value: bool) -> TrcBool { + Self { value } + } } diff --git a/rust/src/tvm/types/trcfloat.rs b/rust/src/tvm/types/trcfloat.rs index d3f5d719..593a534a 100644 --- a/rust/src/tvm/types/trcfloat.rs +++ b/rust/src/tvm/types/trcfloat.rs @@ -1,7 +1,15 @@ +use std::fmt::Display; + use super::TrcObj; pub struct TrcFloat { + pub value: f64, +} +impl TrcFloat { + pub fn new(value: f64) -> TrcFloat { + Self { value } + } } impl TrcObj for TrcFloat { @@ -9,3 +17,9 @@ impl TrcObj for TrcFloat { "float" } } + +impl Display for TrcFloat { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.value) + } +} diff --git a/rust/src/tvm/types/trcint.rs b/rust/src/tvm/types/trcint.rs index db4f1620..1acc24f6 100644 --- a/rust/src/tvm/types/trcint.rs +++ b/rust/src/tvm/types/trcint.rs @@ -1,6 +1,15 @@ -use gettextrs::gettext; -use crate::{base::error::{ErrorInfo, OPERATOR_IS_NOT_SUPPORT, OPERATOR_ERROR}, impl_oper, batch_impl_opers}; +use std::fmt::Display; + +use super::trcfloat::TrcFloat; use super::{TrcObj, TypeError}; +use crate::impl_single_oper; +use crate::{ + base::error::{ + ErrorInfo, OPERATOR_ERROR, OPERATOR_IS_NOT_SUPPORT, ZERO_DIV, ZERO_DIVSION_ERROR, + }, + batch_impl_opers, impl_oper, +}; +use gettextrs::gettext; pub struct TrcInt { pub value: i64, @@ -12,14 +21,73 @@ impl TrcInt { } } +fn extra_div_int(a: i64, b: i64) -> Result { + if b == 0 { + return Err(ErrorInfo::new( + gettext!(ZERO_DIV, a), + gettext(ZERO_DIVSION_ERROR), + )); + } + Ok(a / b) +} + +fn div_int(a: i64, b: i64) -> Result { + if b == 0 { + return Err(ErrorInfo::new( + gettext!(ZERO_DIV, a), + gettext(ZERO_DIVSION_ERROR), + )); + } + Ok(a as f64 / b as f64) +} + +fn mod_int(a: i64, b: i64) -> Result { + if b == 0 { + return Err(ErrorInfo::new( + gettext!(ZERO_DIV, a), + gettext(ZERO_DIVSION_ERROR), + )); + } + Ok(a % b) +} + +/// won't throw error,although 0^0 is undefined,but to be more convenient to use, so we return 1 +fn power_int(a: i64, b: i64) -> i64 { + if b == 0 { + return 1; + } + let mut t = power_int(a, b / 2); + t *= t; + if b % 2 == 1 { + t *= a; + } + t +} + +impl Display for TrcInt { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.value) + } +} + impl TrcObj for TrcInt { fn get_type_name(&self) -> &str { "int" } - // impl_oper!(add, +, "+", TrcInt); batch_impl_opers!( - add => +, "+", TrcInt, - sub => -, "-", TrcInt + add => +, "+", TrcInt, TrcInt, + sub => -, "-", TrcInt, TrcInt, + mul => *, "*", TrcInt, TrcInt, + bit_and => &, "&", TrcInt, TrcInt, + bit_or => |, "|", TrcInt, TrcInt, + bit_left_shift => <<, "<<", TrcInt, TrcInt, + bit_right_shift => >>, ">>", TrcInt, TrcInt ); + + impl_oper!(div, div_int, "/", TrcInt, TrcFloat, ?); + impl_oper!(extra_div, extra_div_int, "//", TrcInt, TrcInt, ?); + impl_oper!(modd, mod_int, "%", TrcInt, TrcInt, ?); + impl_oper!(power, power_int, "**", TrcInt, TrcInt,,); + impl_single_oper!(bit_not, !, "~", TrcInt, TrcInt); } diff --git a/rust/src/tvm/types/trcstr.rs b/rust/src/tvm/types/trcstr.rs index 0ad7be3d..5d905fc1 100644 --- a/rust/src/tvm/types/trcstr.rs +++ b/rust/src/tvm/types/trcstr.rs @@ -1,7 +1,9 @@ +use std::fmt::Display; + use super::TrcObj; pub struct TrcStr { - + value: String, } impl TrcObj for TrcStr { @@ -9,3 +11,9 @@ impl TrcObj for TrcStr { "str" } } + +impl Display for TrcStr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.value) + } +} diff --git a/rust/tests/testdata/compiler/compiler1.txt b/rust/tests/testdata/compiler/compiler1.txt new file mode 100644 index 00000000..d312c0d5 --- /dev/null +++ b/rust/tests/testdata/compiler/compiler1.txt @@ -0,0 +1,3 @@ +hduefgdisvicvdsiugfuewgfjdfbsdjkfcdsfgsd + +dsefddfdfvfd \ No newline at end of file diff --git a/src/TVM/TVM.cppm b/src/TVM/TVM.cppm index b35d1ca0..954e2f28 100644 --- a/src/TVM/TVM.cppm +++ b/src/TVM/TVM.cppm @@ -75,8 +75,9 @@ public: */ template void error_report(error::error_type error, const P&... argv) { - error::send_error_interal( - error, name, std::to_string(static_data.line_number_table[run_index] + 1), argv...); + error::send_error_interal(error, name, + std::to_string(static_data.line_number_table[run_index] + 1), + argv...); } /** diff --git a/src/base/Error.cppm b/src/base/Error.cppm deleted file mode 100644 index 527e2e3f..00000000 --- a/src/base/Error.cppm +++ /dev/null @@ -1,86 +0,0 @@ -module; -#include -export module Error; -import trcdef; -import language; - -export namespace trc::error { -/** - * 报错设置 - * 系统需要知道当前处于什么模式,以合适的模式应对发生的状况 - */ -namespace error_env { - // 是否终止程序 - bool quit = true; - - class vm_run_error : public std::exception { }; -} - -// 错误,增强可读性 -enum error_type { - NameError, - ValueError, - SyntaxError, - VersionError, - OpenFileError, - ModuleNotFoundError, - ArgumentError, - ZeroDivError, - RunError, - AssertError, - IndexError, - MemoryError, - KeyError, - SystemError, - OperatorError, - RedefinedError -}; - -/** - * @brief 输出报错信息 - * @param error_name 异常名 - * @param ap 可变参数 - */ -template -void output_error_msg(error_type error_name, const argv_t&... ap) { - // 报错的模板字符串 - auto index = 0; - const char* argv_arr[] = { ap... }; - const char* base_string = argv_arr[0]; - for (size_t i = 0; base_string[i]; ++i) { - if (base_string[i] == '%') { - // 输出报错字符串 - fprintf(stderr, "%s", argv_arr[++index]); - } else { - fputc(base_string[i], stderr); - } - } - fputc('\n', stderr); -} - -template -void send_error_interal(error_type name, const std::string& module_name, - const std::string&postion_info, const argv_t&... ap) { - if constexpr (compiling) { - fprintf(stderr, "\n%s%s\n", language::error::error_from, postion_info.c_str()); - } else { - fprintf(stderr, "\n%s%s\n%s%s:\n", language::error::error_from, - module_name.c_str(), language::error::error_in_line, postion_info.c_str()); - } - // 输出错误名 - fprintf(stderr, "%s", language::error::error_map[name]); - output_error_msg(name, ap...); - // 检查设置判断是否报错 - if (error_env::quit) { - // 报错,退出程序 - exit(EXIT_FAILURE); - } - // 跳转到执行的地方 - throw error_env::vm_run_error(); -} - -template -void send_error(error_type error_name, const argv_t&... argv) { - send_error_interal(error_name, "__main__", "0", argv...); -} -} diff --git a/src/base/error.cppm b/src/base/error.cppm new file mode 100644 index 00000000..325fcee6 --- /dev/null +++ b/src/base/error.cppm @@ -0,0 +1,78 @@ +module; +#include +#include +#include +export module error; +import trcdef; + +export namespace trc::error { +class content { +public: + content() { + } + +private: + virtual const char* get_module_name() = 0; + virtual size_t get_line() = 0; +}; + +class error_info { +public: + const char* error_type; + std::string error_msg; +}; + +const char* no_reach + = "The program executed code that should not have been executed.Please " + "report the problem to the Github repository."; + +template +class [[nodiscard("ignore the result")]] Result { + +} + +class vm_run_error : public std::exception { +public: + const content* error_content; + error_info info; + vm_run_error(const content* error_content, error_info error_if) + : error_content(error_content) + , error_info(error_info) { + } + + const char* what() const noexcept override { + return "vm run error"; + } + + std::string error_msg() { + auto ret = std::vformat("\n{}{}\n{}{}:\n"); + fprintf(stderr, , error::error_from, module_name.c_str(), + error::error_in_line, postion_info.c_str()); + // 输出错误名 + fprintf(stderr, "%s", language::error::error_map[name]); + } +}; + +// 错误,增强可读性 +enum error_type { + NameError, + ValueError, + SyntaxError, + VersionError, + OpenFileError, + ModuleNotFoundError, + ArgumentError, + ZeroDivError, + RunError, + AssertError, + IndexError, + MemoryError, + KeyError, + SystemError, + OperatorError, + RedefinedError +}; + +const char* error_from = "Error from {}"; +const char* error_in_line = "Error in line {}"; +} diff --git a/src/base/io.cppm b/src/base/io.cppm index 53032f01..f43052cf 100644 --- a/src/base/io.cppm +++ b/src/base/io.cppm @@ -12,8 +12,7 @@ module; #include #include export module io; -import Error; -import language; +import error; const size_t mem_init_size = 15; const size_t mem_realloc_size = 20; diff --git a/src/base/memory/memory.cppm b/src/base/memory/memory.cppm index 413cd5ce..304d8a30 100644 --- a/src/base/memory/memory.cppm +++ b/src/base/memory/memory.cppm @@ -6,8 +6,7 @@ module; #include export module memory; -import Error; -import language; +import error; namespace trc::memory { /** diff --git a/src/base/unreach.cppm b/src/base/unreach.cppm index e950d978..27cc10d7 100644 --- a/src/base/unreach.cppm +++ b/src/base/unreach.cppm @@ -1,10 +1,11 @@ module; #include #include +#include #include #include export module unreach; -import language; +import error; namespace trc { export void unreach(const std::string& error_msg, @@ -13,7 +14,7 @@ export void unreach(const std::string& error_msg, "%s\nFatal error in function \"%s\" file %s line " "%u\n%s", error_msg.c_str(), source_info.function_name(), source_info.file_name(), - source_info.line(), language::error::noreach); + source_info.line(), gettext(error::noreach)); exit(EXIT_FAILURE); } } diff --git a/src/base/utils/filesys.cppm b/src/base/utils/filesys.cppm index 3cc2427e..15b3aaa6 100644 --- a/src/base/utils/filesys.cppm +++ b/src/base/utils/filesys.cppm @@ -13,9 +13,8 @@ module; #include #include export module filesys; -import Error; +import error; import trcdef; -import language; export namespace fs = std::filesystem; diff --git a/src/compiler/compile_env.cppm b/src/compiler/compile_env.cppm index 5cc77dfa..69c59663 100644 --- a/src/compiler/compile_env.cppm +++ b/src/compiler/compile_env.cppm @@ -4,12 +4,10 @@ module; #include #include -#include "parser.tab.hpp" export module compile_env; import compiler_def; import TVMdef; import Error; -import language; export namespace trc::compiler { // size_t最大值,代表不存在 @@ -96,8 +94,8 @@ size_t module_compile_env::get_index_of_function(size_t name) { return i; } } - compiler_data.send_error(error::NameError, - language::error::nameerror, compiler_data.const_name.ref[name].c_str()); + compiler_data.send_error(error::NameError, language::error::nameerror, + compiler_data.const_name.ref[name].c_str()); return 0; } @@ -121,8 +119,7 @@ size_t basic_compile_env::get_index_of_var(size_t name, bool report_error) { } if (report_error) { // 并不在当前符号表,报错 - compiler_data.send_error(error::NameError, - language::error::nameerror, + compiler_data.send_error(error::NameError, language::error::nameerror, compiler_data.const_name.ref[name].c_str()); } return unsave; diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp deleted file mode 100644 index 18b212bf..00000000 --- a/src/compiler/compiler.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "compiler.hpp" -#include -import Error; -import compiler_def; - -namespace trc::compiler { -compiler::compiler() = default; - -int compiler::parse(const compiler_option& option, const std::string& f, TVM_space::TVM_static_data* vm) { - location.initialize(&f); - compile_data = new compiler_public_data(option, vm, location); - scan_begin(f); - yy::parser parse(*this); - parse.set_debug_level(trace_parsing); - int res = parse(); - scan_end(); - delete compile_data; - compile_data = nullptr; - return res; -} - -int compiler::parse(const compiler_option& option, const std::string&filename, FILE* f, TVM_space::TVM_static_data* vm) { - location.initialize(&filename); - compile_data = new compiler_public_data(option, *vm, location); - scan_begin(f); - yy::parser parse(*this); - parse.set_debug_level(trace_parsing); - int res = parse(); - scan_end(); - delete compile_data; - compile_data = nullptr; - return res; -} - -FILE* compiler::open_file(const std::string& file) { - auto f = fopen(file.c_str(), "r"); - if (f == nullptr) { - compile_data->send_error( - error::OpenFileError, language::error::openfileerror, file.c_str()); - } - return f; -} -} diff --git a/src/compiler/compiler.hpp b/src/compiler/compiler.cppm similarity index 52% rename from src/compiler/compiler.hpp rename to src/compiler/compiler.cppm index ecb2b25d..99bffd4b 100644 --- a/src/compiler/compiler.hpp +++ b/src/compiler/compiler.cppm @@ -1,23 +1,23 @@ -#pragma once - +module; #include +export module compiler; +import Error; +import compiler_def; namespace trc::TVM_space { - class TVM_static_data; +class TVM_static_data; } namespace trc::compiler { -class compiler_public_data; - -class compiler_option; - class compiler { public: compiler(); // Run the parser on file F. Return 0 on success. - int parse(const compiler_option& option, const std::string& f, TVM_space::TVM_static_data* vm); - int parse(const compiler_option& option, const std::string&filename, FILE* f, TVM_space::TVM_static_data* vm); + int parse(const compiler_option& option, const std::string& f, + TVM_space::TVM_static_data* vm); + int parse(const compiler_option& option, const std::string& filename, + FILE* f, TVM_space::TVM_static_data* vm); // Whether to generate parser debug traces. bool trace_parsing = false; @@ -36,3 +36,17 @@ class compiler { }; } +namespace trc::compiler { +compiler::compiler() = default; + +int compiler::parse(const compiler_option& option, const std::string& f, + TVM_space::TVM_static_data* vm) { +} + +int compiler::parse(const compiler_option& option, const std::string& filename, + FILE* f, TVM_space::TVM_static_data* vm) { +} + +FILE* compiler::open_file(const std::string& file) { +} +} diff --git a/src/compiler/compiler_def.cppm b/src/compiler/compiler_def.cppm index e3a02192..a8d37d6f 100644 --- a/src/compiler/compiler_def.cppm +++ b/src/compiler/compiler_def.cppm @@ -1,11 +1,10 @@ module; #include #include +#include #include #include #include -#include -#include "parser.tab.hpp" export module compiler_def; import TVM; import Error; @@ -81,15 +80,15 @@ public: */ class compiler_public_data { public: - compiler_public_data(const compiler_option& option, TVM_space::TVM_static_data& vm, yy::location& location) + compiler_public_data( + const compiler_option& option, TVM_space::TVM_static_data& vm) : option(option) , vm(vm) , const_int(vm.const_i) , const_float(vm.const_f) , const_string(vm.const_s) , const_long_int(vm.const_long) - , const_name(name_list) - , loc(location) { + , const_name(name_list) { // 添加true,false,null常量 const_int.add(0); const_int.add(1); @@ -101,16 +100,14 @@ public: std::vector name_list; constant_pool_controller const_string, const_long_int, const_name; - yy::location& loc; /** * @brief 报出错误 */ template void send_error(error::error_type errorn, const P&... argv) { - //todo:optimize it + // todo:optimize it std::stringstream ss; - ss << loc; error::send_error_interal(errorn, ss.str(), argv...); } }; diff --git a/src/compiler/token.cppm b/src/compiler/token.cppm new file mode 100644 index 00000000..cad0b9c6 --- /dev/null +++ b/src/compiler/token.cppm @@ -0,0 +1,665 @@ +module; +#include +#include +#include +#include +#include +#include +export module token; +import compiler_def; +import trc_flong; +import trc_long; +import Error; +import language; +import unreach; + +constexpr size_t buffersize = 4028; + +export namespace trc::compiler { +// token的标识 +enum class token_ticks : size_t { + FOR, // for + WHILE, // while + IF, // if + FUNC, // function + CLASS, // class + ADD, // + + SUB, // - + MUL, // * + DIV, // / + ZDIV, // // + MOD, // % + POW, // ** + AND, // and + OR, // or + NOT, // not + EQUAL, // == + UNEQUAL, // != + LESS, // < + GREATER, // > + LESS_EQUAL, // <= + GREATER_EQUAL, // >= + IMPORT, // import + GOTO, // goto + DEL, // del + ASSERT, // assert + BREAK, // break + CONTINUE, // continue + SELFADD, // += + SELFSUB, // -= + SELFMUL, // *= + SELFDIV, // /= + SELFZDIV, // //= + SELFMOD, // %= + SELFPOW, // **= + ASSIGN, // = + STORE, // := + NAME, // 名称 + NULL_, // null + TRUE_, // true + FALSE_, // false + STRING_VALUE, // 字符串值 + LONG_FLOAT_VALUE, // 长浮点型值 + FLOAT_VALUE, // 浮点数值 + LONG_INT_VALUE, // 长整型值 + INT_VALUE, // 整型值 + LEFT_BIG_BRACE, // { + RIGHT_BIG_BRACE, // } + LEFT_SMALL_BRACE, // ( + RIGHT_SMALL_BRACE, // ) + LEFT_MID_BRACE, // [ + RIGHT_MID_BRACE, // ] + POINT, //. + COMMA, // , + UNKNOWN, // unknown value,可以暂时用来占个位 + END_OF_TOKENS, // 解析结束 + END_OF_LINE, // 行结束 +}; + +/** + * @brief token + * @details 一个完整的token包括标识和值两部分,是解析器的基本单元 + */ +struct token { + // 标识 + token_ticks tick {}; + // 值 + size_t data = 0; +}; + +class buffer_ctrl { +private: + /** + * @brief 装载缓冲区并指向它 + * @param buf + */ + void setbuf(std::array& buf) { + char_ptr = buf.data(); + auto readsz = fread(buf.data(), buffersize, + sizeof(std::remove_reference_t::value_type), file); + buf[readsz] = 0; + } + +public: + // char buffer,prevent compiler from reading a large file and storing it in + // memory + std::array buffer1, buffer2; + FILE* file; + bool end = false; + + // 指向当前正在解析的字符 + const char* char_ptr = nullptr; + + buffer_ctrl(FILE* file) + : file(file) { + setbuf(buffer1); + } + + /** + * @brief read next char from buffer + * @return next char + */ + int nextchar() { + if (end) { + return EOF; + } + char ret = *char_ptr; + char_ptr++; + if (*char_ptr == 0) { + if (char_ptr == &buffer1.back()) { + setbuf(buffer2); + } else if (char_ptr == &buffer2.back()) { + setbuf(buffer1); + } else { + end = true; + } + } + return ret; + } + + /** + * @brief get the char now + * @return + */ + int readchar() const { + if (end) { + return EOF; + } + return *char_ptr; + } +}; + +/** + * @brief + * 这是一个将字符串转换成token流按行输出的类 + */ +class token_lex { +public: + explicit token_lex(compiler_public_data& compiler_data, FILE* file); + + ~token_lex(); + + /** + * @brief + * 从当前字符串代码中读取一个token并返回给grammar解析 + * + * @return token 返回一个有意义的token + * @return + * 特殊返回值:由于总是保证返回有意义,所以当token的tick为token_ticks::END_OF_TOKENS时,表示解析结束 + */ + token get_token(); + + /** + * @brief 退回并储存一个token + */ + void unget_token(token t); + + compiler_public_data& compiler_data; + +private: + std::stack tokenback; + + int id = 0; + + buffer_ctrl buf; + + token lexinteral(); + + // 判断是否解析到了终点 + [[nodiscard]] bool end_of_lex() const noexcept; + + // 解析数字(包括浮点数) + token lex_int_float(); + + /** + * @brief 解析一个字符串 + * 注:会略过开头结尾的"和'符号 + */ + token lex_string(); + + /** + * @brief + * 解析英文符号(包含关键字和名称两种可能) + */ + token lex_english(); + + /** + * @brief 解析其他字符,如[],()等 + * @details + * 在这里解析的字符都能被用token_ticks完整表达,所以不需要储存任何信息 + */ + token lex_others(); + + /** + * 解析符号时遇到多种情况,例如读取*后可以为*,*=,**,**=四种情况 + * 使用方法:传入符号后期待的符号,如*期待=,再依次传入期待满足时的标记和期待不满足时的标记 + */ + token_ticks get_binary_ticks( + char expected_char, token_ticks expected, token_ticks unexpected); + + /** + * 只有在有且仅有一个期待字符时使用 + * @brief + * 检查下一个字符是不是期待的字符,如果接下来是不是期待的字符,直接报错 + * + * @param expected_char 接下来唯一期待的字符 + */ + void check_expected_char(char expected_char); + + // 用于检查括号是否正确匹配 + std::stack check_brace; +}; + +token token_lex::lex_string() { + // 略过当前"符号 + char string_begin = buf.readchar(); + buf.nextchar(); + std::string str; + // 使用自定义的函数方便处理换行符,消除字符的移动 + while (true) { + if (end_of_lex()) { + // 读到文件末尾了,说明字符串解析错误 + compiler_data.error.send_error_module( + error::SyntaxError, language::error::syntaxerror_lexstring); + } + char tmp = buf.nextchar(); + if (tmp == string_begin) { + break; + } + if (tmp == '\\') { + // 转义符 + tmp = buf.nextchar(); + // 读出真实符号并匹配转为真实符号 + switch (tmp) { + case 'r': { + str += '\r'; + break; + } + case 'b': { + str += '\b'; + break; + } + case 'n': { + str += '\n'; + break; + } + case '\'': { + str += '\''; + break; + } + case '"': { + str += '"'; + break; + } + case 't': { + str += '\t'; + break; + } + case '\\': { + str += '\\'; + break; + } + case '0': { + str += '\0'; + break; + } + case 'a': { + str += '\a'; + break; + } + case 'f': { + str += '\f'; + break; + } + case 'v': { + str += '\v'; + break; + } + default: { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_escape_char); + } + } + } else { + str += tmp; + } + } + return token { token_ticks::STRING_VALUE, + compiler_data.const_string.add(str) }; +} + +token token_lex::lex_int_float() { + token_ticks tick_for_res = token_ticks::INT_VALUE; + token result; + std::string str; + size_t res_len = 0; + while (!end_of_lex()) { + char c = buf.nextchar(); + if (c == '.') { + // 小数点,开启调整类型为浮点数 + tick_for_res = token_ticks::FLOAT_VALUE; + } else if (c == '_') { + // 忽略数字中间的下划线,例如123_456 + continue; + } else if (isdigit(c)) { + str += c; + ++res_len; + } else { + break; + } + } + // 尝试纠正为长整型和长浮点型 + switch (tick_for_res) { + case token_ticks::FLOAT_VALUE: { + if (res_len > FLOAT_LONGFLOAT_LINE) { + // todo + tick_for_res = token_ticks::LONG_FLOAT_VALUE; + } else { + result.data + = compiler_data.const_float.add(strtod(str.c_str(), nullptr)); + } + break; + } + case token_ticks::INT_VALUE: { + if (res_len > INT_LONGINT_LINE) { + result.data = compiler_data.const_long_int.add(str); + tick_for_res = token_ticks::LONG_INT_VALUE; + } else { + result.data = compiler_data.const_int.add(stoi(str)); + } + break; + } + default: { + unreach(std::format("Another token tick {}", (size_t)tick_for_res)); + } + } + result.tick = tick_for_res; + return result; +} + +bool token_lex::end_of_lex() const noexcept { + char c = buf.readchar(); + return c == '\n' || c == '\0'; +} + +#define CREATE_KEYWORD(str, tick) \ + { str, tick, sizeof(str) - 1 } + +struct { + const char* str; + token_ticks tick; + size_t len; +} keywords_[] = { CREATE_KEYWORD("for", token_ticks::FOR), + CREATE_KEYWORD("while", token_ticks::WHILE), + CREATE_KEYWORD("import", token_ticks::IMPORT), + CREATE_KEYWORD("goto", token_ticks::GOTO), + CREATE_KEYWORD("del", token_ticks::DEL), + CREATE_KEYWORD("assert", token_ticks::ASSERT), + CREATE_KEYWORD("if", token_ticks::IF), + CREATE_KEYWORD("class", token_ticks::CLASS), + CREATE_KEYWORD("func", token_ticks::FUNC), + CREATE_KEYWORD("and", token_ticks::AND), + CREATE_KEYWORD("or", token_ticks::OR), + CREATE_KEYWORD("not", token_ticks::NOT), + CREATE_KEYWORD("null", token_ticks::NULL_), + CREATE_KEYWORD("true", token_ticks::TRUE_), + CREATE_KEYWORD("false", token_ticks::FALSE_), + CREATE_KEYWORD("break", token_ticks::BREAK), + CREATE_KEYWORD("continue", token_ticks::CONTINUE) }; + +#undef CREATE_KEYWORD + +token token_lex::lex_english() { + std::string tmp; + do { + char c = buf.readchar(); + if ((!is_english(c) && !isdigit(c)) || end_of_lex()) { + break; + } + buf.nextchar(); + tmp += c; + } while (true); + for (const auto& keyword : keywords_) { + if (keyword.len == tmp.length() && tmp == keyword.str) { + // 传入空串是因为能在此被匹配的,都可以用token_ticks表达含义,不需要储存具体信息 + return token { keyword.tick }; + } + } + // 啥关键字都不是,只能是名称了 + return token { token_ticks::NAME, compiler_data.const_name.add(tmp) }; +} + +token_ticks token_lex::get_binary_ticks( + char expected_char, token_ticks expected, token_ticks unexpected) { + ++char_ptr; + if (*char_ptr == expected_char) { + return expected; + } else { + --char_ptr; + return unexpected; + } +} + +void token_lex::check_expected_char(char expected_char) { + ++char_ptr; + if (*char_ptr != expected_char) { + char err_tmp[] = { *char_ptr, '\0' }; + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, err_tmp); + } +} + +token token_lex::lex_others() { + token result; + switch (*char_ptr) { + case '<': { + result = token { get_binary_ticks( + '=', token_ticks::LESS_EQUAL, token_ticks::LESS) }; + break; + } + case '>': { + result = token { get_binary_ticks( + '=', token_ticks::GREATER_EQUAL, token_ticks::GREATER) }; + break; + } + case '=': { + result = token { get_binary_ticks( + '=', token_ticks::EQUAL, token_ticks::ASSIGN) }; + break; + } + case '!': { + check_expected_char('='); + result = token { token_ticks::UNEQUAL }; + break; + } + case ':': { + check_expected_char('='); + result = token { token_ticks::STORE }; + break; + } + case '+': { + result = token { get_binary_ticks( + '=', token_ticks::SELFADD, token_ticks::ADD) }; + break; + } + case '-': { + result = token { get_binary_ticks( + '=', token_ticks::SELFSUB, token_ticks::SUB) }; + break; + } + case '*': { + // *比较特殊,有**符号 + if (get_binary_ticks('*', token_ticks::POW, token_ticks::UNKNOWN) + == token_ticks::POW) { + // 确认有两个** + result = token { get_binary_ticks( + '=', token_ticks::SELFPOW, token_ticks::POW) }; + } else { + // 只有一个* + result = token { get_binary_ticks( + '=', token_ticks::SELFMUL, token_ticks::MUL) }; + } + break; + } + case '/': { + // /符号是最特殊的,因为有//符号和/*符号 + if (get_binary_ticks('/', token_ticks::ZDIV, token_ticks::UNKNOWN) + == token_ticks::ZDIV) { + // 确认有两个// + result = token { get_binary_ticks( + '=', token_ticks::SELFZDIV, token_ticks::ZDIV) }; + } else { + // 只有一个/ + if (get_binary_ticks('*', token_ticks::MUL, token_ticks::UNKNOWN) + == token_ticks::MUL) { + // 说明是/*符号,开启注释 + + // 略过当前的*字符 + ++char_ptr; + for (;;) { + if (*char_ptr == '*') { + // 遇到*/的开头,可能可以退出,不是也不用退格,反正都是注释里的,没有实际意义 + ++char_ptr; + if (*char_ptr == '/') { + break; + } else if (*char_ptr == '\n') { + // 跨行注释也需要更新行号 + compiler_data.error.add_line(); + } + } + if (end_of_lex()) { + // 注释未结尾,报错 + compiler_data.error.send_error_module( + error::SyntaxError, + language::error::syntaxerror_lexanno); + } + ++char_ptr; + } + } else { + result = token { get_binary_ticks( + '=', token_ticks::SELFDIV, token_ticks::DIV) }; + } + } + break; + } + case '%': { + result = token { get_binary_ticks( + '=', token_ticks::SELFMOD, token_ticks::MOD) }; + break; + } + /* 以下的这些括号需要进行括号匹配进行验证 */ + case '(': { + result = token { token_ticks::LEFT_SMALL_BRACE }; + check_brace.push('('); + break; + } + case ')': { + result = token { token_ticks::RIGHT_SMALL_BRACE }; + if (check_brace.empty() || check_brace.top() != '(') { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, ")"); + } + check_brace.pop(); + break; + } + case '[': { + result = token { token_ticks::LEFT_MID_BRACE }; + check_brace.push('['); + break; + } + case ']': { + result = token { token_ticks::RIGHT_MID_BRACE }; + if (check_brace.empty() || check_brace.top() != '[') { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, "]"); + } + check_brace.pop(); + break; + } + case '{': { + result = token { token_ticks::LEFT_BIG_BRACE }; + check_brace.push('{'); + break; + } + case '}': { + result = token { token_ticks::RIGHT_BIG_BRACE }; + if (check_brace.empty() || check_brace.top() != '{') { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, "}"); + } + check_brace.pop(); + break; + } + case ',': { + result = token { token_ticks::COMMA }; + break; + } + case '.': { + result = token { token_ticks::POINT }; + break; + } + default: { + // 如果一个字符都没有匹配到,报错 + char error_tmp[2] = { *char_ptr, '\0' }; + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, error_tmp); + } + } + // 跳过当前字符 + ++char_ptr; + return result; +} + +void token_lex::unget_token(token t) { + if (t.tick == token_ticks::END_OF_LINE) { + compiler_data.error.sub_line(); + } + tokenback.push(t); +} + +token token_lex::get_token() { + if (!tokenback.empty()) { + auto ret = tokenback.top(); + tokenback.pop(); + return ret; + } + auto t = lexinteral(); + if (t.tick == token_ticks::END_OF_LINE) { + compiler_data.error.add_line(); + } + return t; +} + +token token_lex::lexinteral() { + if (*char_ptr == '#') { + /*忽略注释*/ + while (!end_of_lex()) { + // 只要不读完文件或本行,就往下读 + ++char_ptr; + } + } + if (*char_ptr == '\n') { + // 加一行 + compiler_data.error.add_line(); + ++char_ptr; + return token { token_ticks::END_OF_LINE }; + } + if (*char_ptr == '\0') { + // 解析结束 + return token { token_ticks::END_OF_TOKENS }; + } + while (*char_ptr == ' ' || *char_ptr == '\t') { + /*略过空白符和制表符*/ + ++char_ptr; + } + if (*char_ptr == '\'' || *char_ptr == '"') { + /*解析字符串*/ + return lex_string(); + } + if (isdigit(*char_ptr)) { + /*解析数字*/ + return lex_int_float(); + } + if (is_english(*char_ptr)) { + /*英文字符,有多种可能,累计直到匹配到关键字(关键字)或者不为英文字符(名称)*/ + return lex_english(); + } + // 各种符号的解析,不满足会报错 + return lex_others(); +} + +token_lex::token_lex(compiler_public_data& compiler_data, FILE* file) + : compiler_data(compiler_data) + , buf(file) { +} + +token_lex::~token_lex() { + compiler_data.error.reset_line(); + // 最后判断括号栈是否为空,如果不为空,说明括号未完全匹配,报错 + if (!check_brace.empty()) { + char error_tmp[] = { check_brace.top(), '\0' }; + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_unmatched_char, error_tmp); + } +} +} \ No newline at end of file diff --git a/src/trc/Trc.cpp b/src/trc/Trc.cpp index c89b0a9a..1893f3a6 100644 --- a/src/trc/Trc.cpp +++ b/src/trc/Trc.cpp @@ -4,10 +4,10 @@ * Author : 李沐阳 */ +#include #include #include #include -#include #ifdef UNITTEST #include #endif @@ -23,13 +23,12 @@ import help; import run; import style; import tdb; -import tools.token; import tshell; import basic_def; import data; import color; import help; -import Compiler; +import compiler; import ctree_loader; import filesys; import code_loader; @@ -52,8 +51,7 @@ struct { argv_func_tools tool_func; } cmd_tool[] = { { "tdb", tools::tools_out::tdb }, { "help", tools::tools_out::help }, { "run", tools::tools_out::run }, - { "token", tools::tools_out::out_token }, { "dis", tools::tools_out::dis }, - { "build", tools::tools_out::build }, + { "dis", tools::tools_out::dis }, { "build", tools::tools_out::build }, { "style", tools::tools_out::style } }; /** diff --git a/src/trc/tools/build.cppm b/src/trc/tools/build.cppm index 8669e4d7..e855f688 100644 --- a/src/trc/tools/build.cppm +++ b/src/trc/tools/build.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module build; import TVM; import ctree_loader; @@ -19,6 +18,7 @@ import unreach; import color; import help; import compile_env; +import compiler; export namespace trc::tools { namespace tools_in { @@ -28,7 +28,8 @@ namespace tools_in { * @param path 文件的路径 */ void _build(TVM_space::TVM* vm, const std::string& path) { - compiler::compiler().parse(tools::compilerOption, path, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, path, &vm->static_data); loader::save_ctree( vm, fs::path(path).replace_extension(".ctree").string()); } diff --git a/src/trc/tools/dis.cppm b/src/trc/tools/dis.cppm index c0b57989..6d1a52cc 100644 --- a/src/trc/tools/dis.cppm +++ b/src/trc/tools/dis.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module dis; import code_loader; import ctree_loader; @@ -18,6 +17,7 @@ import compiler_def; import compile_env; import help; import color; +import compiler; namespace trc::tools { namespace tools_in { @@ -78,9 +78,8 @@ namespace tools_in { if (loader::is_magic(file_path)) loader::loader_ctree(vm, file_path); else { - compiler::compiler() - .parse(tools::compilerOption, file_path, - &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, file_path, &vm->static_data); } out(*vm, file_path); } diff --git a/src/trc/tools/generated_params.cppm b/src/trc/tools/generated_params.cppm index c30632bd..85b67a08 100644 --- a/src/trc/tools/generated_params.cppm +++ b/src/trc/tools/generated_params.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module generated_params; import compile_env; import compiler_def; @@ -18,6 +17,7 @@ import trc_flong; import trc_long; import data; import unreach; +import compiler; namespace trc::tools { bool gen_number_table = true; diff --git a/src/trc/tools/help.cppm b/src/trc/tools/help.cppm index 0cc7223b..3d5a08be 100644 --- a/src/trc/tools/help.cppm +++ b/src/trc/tools/help.cppm @@ -2,12 +2,12 @@ #include #include #include +#include export module help; import color; import trcdef; import cmdparser; import basic_def; -import language; static void output_optimze_msg() { puts(" --optimize,-o:optimize the code."); diff --git a/src/trc/tools/run.cppm b/src/trc/tools/run.cppm index 6f119853..4b4e320c 100644 --- a/src/trc/tools/run.cppm +++ b/src/trc/tools/run.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module run; import TVM; import ctree_loader; @@ -18,6 +17,7 @@ import compiler_def; import data; import help; import color; +import compiler; export namespace trc::tools { namespace tools_in { @@ -27,7 +27,8 @@ namespace tools_in { loader::loader_ctree(vm, path); } else { /*是源文件*/ - compiler::compiler().parse(tools::compilerOption, path, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, path, &vm->static_data); } vm->reload_data(); vm->run_all(); diff --git a/src/trc/tools/tdb.cppm b/src/trc/tools/tdb.cppm index 4960c53a..91f58f25 100644 --- a/src/trc/tools/tdb.cppm +++ b/src/trc/tools/tdb.cppm @@ -11,7 +11,6 @@ module; #include #include #include -#include export module tdb; import TVM; import memory; @@ -28,6 +27,7 @@ import cmdparser; import color; import help; import basic_def; +import compiler; namespace trc { namespace tdb { @@ -96,8 +96,8 @@ namespace tdb { static void debug(const std::string& file_path) { char* instruction = nullptr; TVM_space::free_TVM(vm); - compiler::compiler() - .parse(tools::compilerOption, file_path, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, file_path, &vm->static_data); vm->reload_data(); // 用于输出代码行信息 std::string code; diff --git a/src/trc/tools/tshell.cppm b/src/trc/tools/tshell.cppm index 5b7f7c35..96a6576f 100644 --- a/src/trc/tools/tshell.cppm +++ b/src/trc/tools/tshell.cppm @@ -7,7 +7,6 @@ module; #include #include #include -#include export module tshell; import TVM; import Error; @@ -18,6 +17,7 @@ import compile_env; import compiler_def; import color; import help; +import compiler; /** * @brief 判断是否为新的语句块开始 @@ -60,8 +60,6 @@ static void get_block(std::string& res) { } } - - namespace tools::tools_out { /** * @brief trc的交互式终端界面 @@ -84,7 +82,8 @@ namespace tools::tools_out { rewind(tmpf); vm->static_data.byte_codes.clear(); try { - compiler::compiler().parse(tools::compilerOption, "tshell", tmpf, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, "tshell", tmpf, &vm->static_data); vm->reload_data(); vm->run_all(); } catch (error::error_env::vm_run_error) { } diff --git a/xmake.lua b/xmake.lua index d8a72ee0..6ee890ab 100644 --- a/xmake.lua +++ b/xmake.lua @@ -9,20 +9,17 @@ add_requires("gtest") set_warnings("all", "error") add_rules("mode.debug", "mode.release") -add_rules("plugin.compile_commands.autoupdate", {outputdir = ".vscode"}) add_rules("plugin.compile_commands.autoupdate") add_cxxflags("-Wno-read-modules-implicitly", "-Wno-unused-but-set-variable") add_includedirs("src/compiler") -add_rules("lex", "yacc") target("Trc") - set_kind("binary") - add_files("src/**.cpp", "src/**.cppm", "language/**.cppm") - add_files("src/**.ll", "src/**.yy") +set_kind("binary") +add_files("src/**.cpp", "src/**.cppm") target("unittest") - set_kind("binary") - set_default(false) - add_defines("UNITTEST") - add_files("src/**.cpp", "src/**.cppm", "tests/unittest/**.cpp", "tests/unittest/**.cppm") - add_packages("gtest") +set_kind("binary") +set_default(false) +add_defines("UNITTEST") +add_files("src/**.cpp", "src/**.cppm", "tests/unittest/**.cpp", "tests/unittest/**.cppm") +add_packages("gtest")