diff --git a/.github/workflows/rust.yml b/.github/workflows/rust_linux.yml similarity index 100% rename from .github/workflows/rust.yml rename to .github/workflows/rust_linux.yml diff --git a/.github/workflows/rust_macos.yml b/.github/workflows/rust_macos.yml new file mode 100644 index 00000000..f82f7646 --- /dev/null +++ b/.github/workflows/rust_macos.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master", "dev" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: macos-latest + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cd rust&&cargo build + - name: Run tests + run: cd rust&&cargo test diff --git a/.github/workflows/rust_windows.yml b/.github/workflows/rust_windows.yml new file mode 100644 index 00000000..4dcf79d7 --- /dev/null +++ b/.github/workflows/rust_windows.yml @@ -0,0 +1,22 @@ +name: Rust + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master", "dev" ] + +env: + CARGO_TERM_COLOR: always + +jobs: + build: + + runs-on: windows-latest + + steps: + - uses: actions/checkout@v3 + - name: Build + run: cd rust&&cargo build + - name: Run tests + run: cd rust&&cargo test diff --git a/doc/developer/parser.yy b/doc/developer/ebnf.md similarity index 63% rename from doc/developer/parser.yy rename to doc/developer/ebnf.md index 33cee04d..8fa5e37b 100644 --- a/doc/developer/parser.yy +++ b/doc/developer/ebnf.md @@ -1,94 +1,3 @@ -%skeleton "lalr1.cc" // -*- C++ -*- -%require "3.8.2" -%header - -/* %define api.token.raw */ - -%define api.token.constructor -%define api.value.type variant -%define parse.assert - -%code requires { - # include - namespace trc::compiler { - class compiler; - } -} - -// The parsing context. -%param { trc::compiler::compiler& drv } - -%locations - -%define parse.trace -%define parse.error detailed -%define parse.lac full - -%code { -# include "compiler.hpp" -} - -/* %define api.value.type variant */ -%token FOR // for - WHILE // while - IF // if - FUNC // function - CLASS // class - ADD // + - SUB // - - MUL // * - DIV // / - ZDIV // // - MOD // % - POW // ** - AND // and - OR // or - NOT // not - EQUAL // == - UNEQUAL // != - LESS // < - GREATER // > - LESS_EQUAL // <= - GREATER_EQUAL // >= - IMPORT // import - GOTO // goto - DEL // del - ASSERT // assert - BREAK // break - CONTINUE // continue - SELFADD // += - SELFSUB // -= - SELFMUL // *= - SELFDIV // /= - SELFZDIV // //= - SELFMOD // %= - SELFPOW // **= - ASSIGN // = - STORE // := - NAME // 名称 - NULL_ // null - TRUE_ // true - FALSE_ // false - STRING_VALUE // 字符串值 - LONG_FLOAT_VALUE // 长浮点型值 - FLOAT_VALUE // 浮点数值 - LONG_INT_VALUE // 长整型值 - INT_VALUE // 整型值 - LEFT_BIG_BRACE // { - RIGHT_BIG_BRACE // } - LEFT_SMALL_BRACE // ( - RIGHT_SMALL_BRACE // ) - LEFT_MID_BRACE // [ - RIGHT_MID_BRACE // ] - POINT //. - COMMA // , - ELSE - IN - RETURN - PUBLIC - PRIVATE -%start program -%% program : statements statements : statements statement | statement statement : @@ -173,4 +82,4 @@ classdef : CLASS NAME LEFT_SMALL_BRACE NAME RIGHT_SMALL_BRACE LEFT_BIG_BRACE opt_funcdef_valdef RIGHT_BIG_BRACE assert : ASSERT expr del : DEL expr -%% + diff --git a/locales/zh_CN/LC_MESSAGES/trans.mo b/locales/zh_CN/LC_MESSAGES/trans.mo new file mode 100644 index 00000000..000dafd2 Binary files /dev/null and b/locales/zh_CN/LC_MESSAGES/trans.mo differ diff --git a/locales/zh_CN/LC_MESSAGES/trans.po b/locales/zh_CN/LC_MESSAGES/trans.po index 3d21edc8..bb59129d 100644 --- a/locales/zh_CN/LC_MESSAGES/trans.po +++ b/locales/zh_CN/LC_MESSAGES/trans.po @@ -1,4 +1,3 @@ - msgid "NamaError" msgstr "名字错误:" @@ -47,11 +46,11 @@ msgstr "运算符错误:" msgid "RedefinedError" msgstr "重定义错误:" -msgid "Error from " -msgstr "错误来自" +msgid "Error from {}" +msgstr "错误来自{}" -msgid "Error in line" -msgstr "错误发生在行" +msgid "Error in line {}" +msgstr "错误发生在行{}" msgid "Name \"%s\" is not defined." msgstr "名字\"%s\"没有被定义." @@ -68,7 +67,7 @@ msgstr "无法从操作系统中申请内存." msgid "\"%s\" division by zero." msgstr "\"%s\"被零除" -msgid "Could't find \"%\s\" module." +msgid "Could't find \"{}\" module." msgstr "无法找到\"%s\"模块." msgid "Key \"%s\" is not defined." @@ -77,11 +76,11 @@ msgstr "键\"%s\"未定义" msgid "%s is out of %s" msgstr "%超出了%的范围" -msgid "\"%s\" could not be \"%s\""" +msgid "\"%s\" could not be \"%s\"" msgstr "\"%s\"不能被转换为\"%s\"" -msgid "Couldn't use %s for types:%s and %s" -msgstr "不能使用运算符\"%s\"对于类型:%s and %s" +msgid "Couldn't use {} for types:{} and {}" +msgstr "不能使用运算符\"{}\"对于类型:{}和{}" msgid "Function %s is redefined" msgstr "函数%s被重定义" @@ -95,17 +94,17 @@ msgstr "%s需要%s个参数." msgid "Number %s is incorrect." msgstr "数字%s不正确." -msgid "The string isn't end with \" or \'" -msgstr "这个字符串不以\"或\'结尾" +msgid "The string isn't end with \" or '" +msgstr "这个字符串不以\"或'结尾" msgid "%s is not be expected." msgstr "%s是不被期待的" -msgid "%s is excepted." -msgstr "%s是被期待的."; +msgid "{} is excepted." +msgstr "{}是被期待的." msgid "Comments should end with */" -msgstr "多行注释应当以*/结尾"; +msgstr "多行注释应当以*/结尾" msgid "Escape character %s is not defined." msgstr "转义字符%s未定义." @@ -126,37 +125,34 @@ msgid "Dll %s was not found" msgstr "找不到dll\"%s\"" -msgid "The program executed code that should not have been executed.Please -report the problem to the Github repository." -msgstr "这个项目运行了不应该被运行的代码 -.请将这个问题报告给github仓库"; +msgid "The program executed code that should not have been executed.Please report the problem to the Github repository." +msgstr "这个项目运行了不应该被运行的代码.请将这个问题报告给github仓库" -msgid "Trc:\"%s\" is not a ctree file.Because its -magic number is error\n" -msgstr "Trc:\"%s\"不是一个ctree文件.因为它的魔数不正确.\n" +msgid "Trc:\"{}\" is not a ctree file.Because its magic number is error\n" +msgstr "Trc:\"{}\"不是一个ctree文件.因为它的魔数不正确.\n" -msgid "Trc is a stack programming language. This -project implements most of the modern -programming language basics, provides a perfect -tool chain, which is suitable for working -scripts or embedded in your projects, and helps +msgid "Trc is a stack programming language. This \ +project implements most of the modern \ +programming language basics, provides a perfect \ +tool chain, which is suitable for working \ +scripts or embedded in your projects, and helps \ to learn how to compile. " -msgstr "Trc是一门基于栈的编程语言。这个项目实现 -了大部分现代编程语言的功能,提供了一个完 -善的工具链。它很方便被嵌入到你的项目中或 +msgstr "Trc是一门基于栈的编程语言。这个项目实现\ +了大部分现代编程语言的功能,提供了一个完\ +善的工具链。它很方便被嵌入到你的项目中或\ 者作为工作脚本,也可以帮助你去学习编译原理." msgid "mode is not defined.\n" -msgstr "模式没有被定义\n"; +msgstr "模式没有被定义\n" msgid "var" msgstr "变量" msgid " is not defined.\n" -msgstr "没有被定义" +msgstr "没有被定义.\n" -msgid "Trc debugger is running.You can read 'doc/use/TDB.md' to find the help." -msgstr "trc的调试器正在运行.你可以阅读'doc/ +msgid "Trc debugger is running.You can read 'doc/use/TDB.md' to find the help.\n" +msgstr "trc的调试器正在运行.你可以阅读'doc/\ use/TDB.md'以寻找帮助.\n" msgid "instruction" diff --git a/rust/Cargo.lock b/rust/Cargo.lock index dcf2cb70..1a0a5031 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628a8f9bd1e24b4e0db2b4bc2d000b001e7dd032d54afa60a68836aeec5aa54a" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" dependencies = [ "anstyle", "anstyle-parse", @@ -60,10 +60,10 @@ dependencies = [ ] [[package]] -name = "anyhow" -version = "1.0.79" +name = "autocfg" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "block" @@ -150,9 +150,9 @@ checksum = "9ea835d29036a4087793836fa931b08837ad5e957da9e23886b29586fb9b6650" [[package]] name = "getrandom" -version = "0.2.11" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", @@ -193,9 +193,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.151" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "locale_config" @@ -225,6 +225,36 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + [[package]] name = "objc" version = "0.2.7" @@ -262,9 +292,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.76" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] @@ -310,9 +340,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", @@ -322,9 +352,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -364,12 +394,12 @@ checksum = "dd16aa9ffe15fe021c6ee3766772132c6e98dfa395a167e16864f61a9cfb71d6" name = "trc" version = "0.1.0" dependencies = [ - "anyhow", "clap", "colored", "downcast-rs", "gettext-rs", "lazy_static", + "num-bigint", "rand", ] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 9880b42d..827f3745 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -17,4 +17,7 @@ gettext-rs = "0.7.0" colored = "2.1.0" downcast-rs = "1.2.0" lazy_static = "1.4.0" -anyhow = "1.0.79" +num-bigint = "0.4.4" + +[profile.release] +panic = "abort" diff --git a/rust/README.md b/rust/README.md index ca06ea18..4613bcb5 100644 --- a/rust/README.md +++ b/rust/README.md @@ -11,6 +11,7 @@ So this is just an experimental project and I will keep develop c++ version and Like other common rust project.Just use ```cargo build``` Running tests is like other rust project,too.Just ```cargo test``` +But in order to read test data file,please run in the root dir. ## How to use diff --git a/rust/docs/developer/EBNF.md b/rust/docs/developer/EBNF.md new file mode 100644 index 00000000..b0ce1a15 --- /dev/null +++ b/rust/docs/developer/EBNF.md @@ -0,0 +1,15 @@ +# the ebnf of the trc + +help to develop the compiler + +programs : statements +statements : statements statement +statement : + ID := expr + ID(argvs) +opt_argvs: argvs | empty +argvs : argvs , argv | argv +argv : expr +expr : expr + term | expr - term +term : term * factor | term / factor +factor : (expr) | ID diff --git a/rust/docs/usage.md b/rust/docs/usage.md index 161ec6ae..21912d35 100644 --- a/rust/docs/usage.md +++ b/rust/docs/usage.md @@ -45,6 +45,7 @@ There are also many easape char in the string: |\\\\|\| |\'|'| |\"|"| +|\0|the tick of the end of the string| If you add ```r``` or ```R``` in front of the string.Trc will treat it as a raw string. Yes.These rules are from Python.I love its grammar rules @@ -138,6 +139,13 @@ hello world! */ ``` +## the var of trc + +First,we support the UTF-8 with your var name.So you can define your var like this: +```go +你好:=90 +``` + ## Data structures for Trc Std lib provide many kinds of data structures for Trc.Here is the list: diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.mo b/rust/locales/zh_CN/LC_MESSAGES/trans.mo index 0a8e3d85..4a4fbf1f 100644 Binary files a/rust/locales/zh_CN/LC_MESSAGES/trans.mo and b/rust/locales/zh_CN/LC_MESSAGES/trans.mo differ diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.po b/rust/locales/zh_CN/LC_MESSAGES/trans.po index 6c792b1e..cf011ce6 100644 --- a/rust/locales/zh_CN/LC_MESSAGES/trans.po +++ b/rust/locales/zh_CN/LC_MESSAGES/trans.po @@ -33,4 +33,23 @@ msgid "{} is divided by zero" msgstr "{}被零除" msgid "frame stack is empty.But running a pop frame opcode" -msgstr "帧栈为空,但运行了pop frame指令" +msgstr "帧栈为空,但运行了pop frame指令" + +msgid "Prefix {} can be used for float" +msgstr "前缀{}不能对浮点数使用" + +msgid "NumberOverFlowError" +msgstr "数值溢出错误" + +msgid "Float {} is too large to store" +msgstr "浮点数{}超过了储存范围" + +msgid "token {} is not expected" +msgstr "token{}不是被期望的" + +msgid "In module {}" +msgstr "在模块{}中" + +msgid "Error in line {}" +msgstr "错误在第{}行" + diff --git a/rust/script/pre-commit b/rust/script/pre-commit new file mode 100755 index 00000000..97d4f1b6 --- /dev/null +++ b/rust/script/pre-commit @@ -0,0 +1,8 @@ +#!/bin/sh + +cd script +python3 gen_locales.py +cd ../rust +cargo fmt +cd script +python3 gen_locales.py diff --git a/rust/src/base.rs b/rust/src/base.rs index a91e7351..2e946440 100644 --- a/rust/src/base.rs +++ b/rust/src/base.rs @@ -1 +1,4 @@ +pub mod codegen; +pub mod ctrc; pub mod error; +pub mod func; diff --git a/rust/src/base/codegen.rs b/rust/src/base/codegen.rs new file mode 100644 index 00000000..ca000937 --- /dev/null +++ b/rust/src/base/codegen.rs @@ -0,0 +1,71 @@ +use super::func; + +pub enum Opcode { + Add, + Sub, + Mul, + Div, + ExtraDiv, + Mod, + Power, + Eq, + Ne, + Lt, + Le, + Gt, + Ge, + And, + Or, + Not, + Xor, + BitNot, + BitAnd, + BitOr, + BitLeftShift, + BitRightShift, + // change the option code index + Goto, + // return from a function + PopFrame, + // create a frame to hold the function + NewFrame, + // Load a int from const pool + LoadInt, +} + +pub struct ConstPool { + pub intpool: Vec, + pub stringpool: Vec, + pub floatpool: Vec, +} + +impl ConstPool { + pub fn new() -> Self { + Self { + intpool: Vec::new(), + stringpool: Vec::new(), + floatpool: Vec::new(), + } + } +} + +pub struct Inst { + pub opcode: Opcode, + pub operand: usize, +} + +pub struct StaticData { + pub constpool: ConstPool, + pub inst: Vec, + pub funcs: Vec, +} + +impl StaticData { + pub fn new() -> StaticData { + Self { + constpool: ConstPool::new(), + inst: vec![], + funcs: vec![], + } + } +} diff --git a/rust/src/base/ctrc.rs b/rust/src/base/ctrc.rs new file mode 100644 index 00000000..7be5b67f --- /dev/null +++ b/rust/src/base/ctrc.rs @@ -0,0 +1,3 @@ +//! generate ctrc file +//! ctrc file is trc's compiled object +//! can be loaded and runned by vm without compiling diff --git a/rust/src/base/error.rs b/rust/src/base/error.rs index 2fd09115..4a6fc8a8 100644 --- a/rust/src/base/error.rs +++ b/rust/src/base/error.rs @@ -6,6 +6,7 @@ pub const SYNTAX_ERROR: &str = "SyntaxError"; pub const OPERATOR_ERROR: &str = "OperatorError"; pub const VM_ERROR: &str = "VmError"; pub const ZERO_DIVSION_ERROR: &str = "ZeroDivisionError"; +pub const NUMBER_OVER_FLOW: &str = "NumberOverFlowError"; pub const STRING_WITHOUT_END: &str = "this string should be ended with {}"; pub const UNMATCHED_BRACE: &str = "{} is unmatched"; @@ -14,18 +15,23 @@ pub const VM_DATA_NUMBER: &str = "The number of data of vm stack is not correct, should have {} data"; pub const VM_FRAME_EMPTY: &str = "frame stack is empty.But running a pop frame opcode"; pub const ZERO_DIV: &str = "{} is divided by zero"; +pub const PREFIX_FOR_FLOAT: &str = "Prefix {} can be used for float"; +pub const FLOAT_OVER_FLOW: &str = "Float {} is too large to store"; +pub const UNEXPECTED_TOKEN: &str = "token {} is not expected"; +pub const ERROR_IN_LINE: &str = "Error in line {}"; +pub const IN_MODULE: &str = "In module {}"; #[derive(Debug)] pub struct ErrorInfo { pub message: String, - errot_type: &'static str, + error_type: String, } impl ErrorInfo { - pub fn new(message: String, error_type: &'static str) -> ErrorInfo { + pub fn new(message: String, error_type: String) -> ErrorInfo { ErrorInfo { message, - errot_type: error_type, + error_type, } } } @@ -50,12 +56,12 @@ impl Display for RuntimeError { /// but you should translate the error messgae by caller fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let s = format!( - r#"Error in line {} -In module {} + r#"{} +{} {}:{}"#, - self.content.get_line(), - self.content.get_module_name(), - gettext(self.info.errot_type), + gettext!(ERROR_IN_LINE, self.content.get_line()), + gettext!(IN_MODULE, self.content.get_module_name()), + gettext(self.info.error_type.clone()), self.info.message ); write!(f, "{}", s) @@ -67,3 +73,5 @@ impl RuntimeError { RuntimeError { content, info } } } + +pub type RunResult = Result; diff --git a/rust/src/base/func.rs b/rust/src/base/func.rs new file mode 100644 index 00000000..4da3b020 --- /dev/null +++ b/rust/src/base/func.rs @@ -0,0 +1,9 @@ +pub struct Func { + pub name: String, +} + +impl Func { + fn new(name: String) -> Self { + Self { name } + } +} diff --git a/rust/src/cfg.rs b/rust/src/cfg.rs index 25a01f3b..bd7a7109 100644 --- a/rust/src/cfg.rs +++ b/rust/src/cfg.rs @@ -1,3 +1,4 @@ //! some constant values and configurations in trc pub const MAIN_MODULE_NAME: &str = "main"; +pub const FLOAT_OVER_FLOW_LIMIT: usize = 18; diff --git a/rust/src/compiler.rs b/rust/src/compiler.rs index 305731a6..bcabd5c5 100644 --- a/rust/src/compiler.rs +++ b/rust/src/compiler.rs @@ -5,10 +5,10 @@ mod ast; mod token; use self::token::TokenLex; -use crate::base::error; +use crate::base::codegen::{ConstPool, StaticData}; +use crate::base::error::{self, RunResult}; use crate::cfg; -use crate::tvm::ConstPool; -use std::collections::hash_map; +use std::collections::{hash_map, HashMap}; use std::io::BufRead; use std::{fs, io, vec}; @@ -76,22 +76,46 @@ impl Option { } } -#[derive(Hash, Eq, PartialEq)] +#[derive(Hash, Eq, PartialEq, Clone)] pub struct Float { - front: i32, - back: i32, + front: u32, + back: u32, } impl Float { - fn new(front: i32, back: i32) -> Self { + fn new(front: u32, back: u32) -> Self { Self { front, back } } + + fn get_len(mut tmp: u32) -> u8 { + if tmp == 0 { + return 1; + } + let ret: u8 = 0; + while tmp != 0 { + tmp /= 10; + } + ret + } + + pub fn to_float(&self) -> f64 { + let len = Self::get_len(self.back); + let mut float_part = self.back as f64; + for _ in 0..len { + float_part /= 10.0; + } + self.front as f64 + float_part + } } +type Pool = hash_map::HashMap; + pub struct ValuePool { - const_ints: hash_map::HashMap, - const_strings: hash_map::HashMap, - const_floats: hash_map::HashMap, + const_ints: Pool, + const_strings: Pool, + const_floats: Pool, + name_pool: Pool, + const_big_int: Pool, } const INT_VAL_POOL_ZERO: usize = 0; @@ -100,9 +124,11 @@ const INT_VAL_POOL_ONE: usize = 1; impl ValuePool { fn new() -> Self { let mut ret = Self { - const_ints: hash_map::HashMap::new(), - const_floats: hash_map::HashMap::new(), - const_strings: hash_map::HashMap::new(), + const_ints: HashMap::new(), + const_floats: HashMap::new(), + const_strings: HashMap::new(), + name_pool: HashMap::new(), + const_big_int: HashMap::new(), }; ret.add_int(0); ret.add_int(1); @@ -114,9 +140,13 @@ impl ValuePool { *self.const_ints.entry(val).or_insert(len_tmp) } + fn string_get(pool: &mut Pool, str: String) -> usize { + let len_tmp = pool.len(); + *pool.entry(str).or_insert(len_tmp) + } + fn add_string(&mut self, val: String) -> usize { - let len_tmp = self.const_strings.len(); - *self.const_strings.entry(val).or_insert(len_tmp) + Self::string_get(&mut self.const_strings, val) } fn add_float(&mut self, val: Float) -> usize { @@ -124,13 +154,16 @@ impl ValuePool { *self.const_floats.entry(val).or_insert(len_tmp) } + fn add_id(&mut self, val: String) -> usize { + Self::string_get(&mut self.name_pool, val) + } + fn store_val_to_vm(&mut self) -> ConstPool { let mut ret = ConstPool::new(); ret.intpool.resize(self.const_ints.len(), 0); for i in &self.const_ints { ret.intpool[*i.1] = *i.0; } - ret } } @@ -152,8 +185,22 @@ impl StringSource { } } +impl Iterator for StringSource { + type Item = char; + + fn next(&mut self) -> std::option::Option { + match self.read() { + '\0' => None, + other => Some(other), + } + } +} + impl TokenIo for StringSource { fn unread(&mut self, c: char) { + if c == '\0' { + return; + } self.pos -= self.prev_size; // check if match the right char if cfg!(debug_assertions) { @@ -175,7 +222,7 @@ impl TokenIo for StringSource { } } -trait TokenIo { +trait TokenIo: Iterator { fn unread(&mut self, c: char); fn read(&mut self) -> char; @@ -192,11 +239,29 @@ impl FileSource { pub fn new(f: fs::File) -> Self { let buf = io::BufReader::new(f); let s = String::new(); - FileSource { + let mut ret = FileSource { back: vec![], buf, input_pos: 0, s, + }; + ret.init_new_line(); + ret + } + + fn init_new_line(&mut self) { + self.s.clear(); + self.buf.read_line(&mut self.s).unwrap(); + self.input_pos = 0; + } +} + +impl Iterator for FileSource { + type Item = char; + fn next(&mut self) -> std::option::Option { + match self.read() { + '\0' => None, + other => Some(other), } } } @@ -214,9 +279,10 @@ impl TokenIo for FileSource { let mut input_pos = self.s[self.input_pos..].chars(); match input_pos.next() { None => { - self.s.clear(); - self.buf.read_line(&mut self.s).unwrap(); - self.input_pos = 0; + self.init_new_line(); + if self.s.is_empty() { + return '\0'; + } } Some(c) => { self.input_pos += c.len_utf8(); @@ -229,7 +295,7 @@ impl TokenIo for FileSource { pub struct Compiler { // to support read from stdin and file - input: Box, + input: Box>, const_pool: ValuePool, option: Option, content: Content, @@ -262,8 +328,62 @@ impl Compiler { } } - pub fn lex(&mut self) { + pub fn lex(&mut self) -> RunResult<()> { let token_lexer = TokenLex::new(self); - let ast_builder = ast::AstBuilder::new(token_lexer); + let mut ast_builder = ast::AstBuilder::new(token_lexer); + ast_builder.generate_code()?; + Ok(()) + } +} + +mod tests { + use super::*; + use std::fs::{read_to_string, File}; + + fn check_read(reader: &mut impl TokenIo, s: &str) { + let mut iter = s.chars(); + for i in reader { + assert_eq!(i, iter.next().unwrap()); + } + assert_eq!(iter.next(), None); + } + + #[test] + fn test_string_literal() { + let source = "source\np"; + let mut t = StringSource::new(String::from(source)); + let mut tmp: Vec = vec![t.read(), t.read()]; + tmp.reverse(); + for i in &tmp { + t.unread(*i); + } + check_read(&mut t, source) + } + + #[test] + fn test_file_read() { + let test_file_path = "tests/testdata/compiler/compiler1.txt"; + let source = read_to_string(test_file_path).expect("please run in root dir"); + let mut t = FileSource::new(File::open(test_file_path).expect("please run in root dir")); + let mut tmp: Vec = vec![t.read(), t.read()]; + tmp.reverse(); + for i in &tmp { + t.unread(*i); + } + check_read(&mut t, &source) + } + + #[test] + fn test_value_pool() { + let mut pool = ValuePool::new(); + assert_eq!(pool.add_int(7), 2); + assert_eq!(pool.add_int(1), INT_VAL_POOL_ONE); + assert_eq!(pool.add_int(0), INT_VAL_POOL_ZERO); + assert_eq!(pool.add_float(Float::new(9, 0)), 0); + assert_eq!(pool.add_float(Float::new(9, 0)), 0); + assert_eq!(pool.add_float(Float::new(9, 5)), 1); + assert_eq!(pool.add_string(String::from("value")), 0); + assert_eq!(pool.add_string(String::from("value")), 0); + assert_eq!(pool.add_string(String::from("vale")), 1); } } diff --git a/rust/src/compiler/ast.rs b/rust/src/compiler/ast.rs index 4ec1347f..ad338891 100644 --- a/rust/src/compiler/ast.rs +++ b/rust/src/compiler/ast.rs @@ -1,11 +1,70 @@ -use super::TokenLex; +use super::{token::Token, Compiler, TokenLex}; +use crate::base::{codegen::StaticData, error::*}; +use clap::error; +use gettextrs::gettext; pub struct AstBuilder<'a> { token_lexer: TokenLex<'a>, + staticdata: StaticData, } impl<'a> AstBuilder<'a> { pub fn new(token_lexer: TokenLex<'a>) -> Self { - AstBuilder { token_lexer } + AstBuilder { + token_lexer, + staticdata: StaticData::new(), + } } + + fn while_lex(&mut self) -> RunResult<()> { + Ok(()) + } + + fn for_lex(&mut self) -> RunResult<()> { + Ok(()) + } + + fn generate_block(&mut self, t: Token) -> RunResult<()> { + Ok(()) + } + + fn statement(&mut self, mut t: Token) -> RunResult<()> { + match t.tp { + super::token::TokenType::ID => {} + _ => { + return Err(RuntimeError::new( + Box::new(self.token_lexer.compiler_data.content.clone()), + ErrorInfo::new( + gettextrs::gettext(SYNTAX_ERROR), + gettext!(UNEXPECTED_TOKEN, t.tp.to_string()), + ), + )) + } + } + Ok(()) + } + + pub fn generate_code(&mut self) -> RunResult<()> { + loop { + let token = self.token_lexer.next_token()?; + match token { + Some(token) => { + self.statement(token)?; + } + None => { + return Ok(()); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_assign() {} + + #[test] + fn builtin_function_call() {} } diff --git a/rust/src/compiler/token.rs b/rust/src/compiler/token.rs index 7df05d0e..b74c11dd 100644 --- a/rust/src/compiler/token.rs +++ b/rust/src/compiler/token.rs @@ -1,9 +1,17 @@ -use super::{Compiler, Content, INT_VAL_POOL_ZERO}; -use crate::base::error::{self, ErrorContent, ErrorInfo, RuntimeError}; +use super::{Compiler, Content, Float, INT_VAL_POOL_ZERO}; +use crate::{ + base::error::{ + self, ErrorContent, ErrorInfo, RunResult, RuntimeError, FLOAT_OVER_FLOW, NUMBER_OVER_FLOW, + PREFIX_FOR_FLOAT, SYNTAX_ERROR, + }, + cfg::FLOAT_OVER_FLOW_LIMIT, +}; use gettextrs::gettext; +use lazy_static::lazy_static; +use std::{collections::HashMap, fmt::Display, process::exit}; -#[derive(PartialEq, Debug)] -enum TokenType { +#[derive(PartialEq, Debug, Clone)] +pub enum TokenType { // . Dot, // , @@ -55,7 +63,7 @@ enum TokenType { // /= SelfDiv, // //= - SelfExtraDiv, + SelfExactDiv, // %= SelfMod, // **= @@ -76,7 +84,6 @@ enum TokenType { StringValue, FloatValue, LongIntValue, - LongFloatValue, // = Assign, // := @@ -99,19 +106,94 @@ enum TokenType { Or, // && And, + // : + Colon, + // ; + Semicolon, + ID, + While, + For, + If, + Else, + Class, + Match, + Func, EndOfLine, } -#[derive(PartialEq, Debug)] -pub enum Data { - Ind(usize), - NONEDATA, +impl Display for TokenType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let res: String; + match self { + TokenType::Dot => res = ".".to_string(), + TokenType::Comma => res = ",".to_string(), + TokenType::LeftBigBrace => res = "{".to_string(), + TokenType::RightBigBrace => res = "}".to_string(), + TokenType::LeftMiddleBrace => res = "[".to_string(), + TokenType::RightMiddleBrace => res = "]".to_string(), + TokenType::LeftSmallBrace => res = "(".to_string(), + TokenType::RightSmallBrace => res = ")".to_string(), + TokenType::Add => res = "+".to_string(), + TokenType::Sub => res = "-".to_string(), + TokenType::Mul => res = "*".to_string(), + TokenType::Div => res = "/".to_string(), + TokenType::Mod => res = "%".to_string(), + TokenType::ExactDiv => res = "//".to_string(), + TokenType::BitNot => res = "~".to_string(), + TokenType::BitLeftShift => res = "<<".to_string(), + TokenType::BitRightShift => res = ">>".to_string(), + TokenType::BitAnd => res = "&".to_string(), + TokenType::BitOr => res = "|".to_string(), + TokenType::Xor => res = "^".to_string(), + TokenType::Power => res = "**".to_string(), + TokenType::SelfAdd => res = "+=".to_string(), + TokenType::SelfSub => res = "-=".to_string(), + TokenType::SelfMul => res = "*=".to_string(), + TokenType::SelfDiv => res = "/=".to_string(), + TokenType::SelfExactDiv => res = "//=".to_string(), + TokenType::SelfMod => res = "%=".to_string(), + TokenType::SelfPower => res = "**=".to_string(), + TokenType::SelfBitNot => res = "~=".to_string(), + TokenType::SelfBitLeftShift => res = "<<=".to_string(), + TokenType::SelfBitRightShift => res = ">>=".to_string(), + TokenType::SelfBitAnd => res = "&=".to_string(), + TokenType::SelfBitOr => res = "|=".to_string(), + TokenType::SelfXor => res = "^=".to_string(), + TokenType::IntValue => res = "integer".to_string(), + TokenType::StringValue => res = "string".to_string(), + TokenType::FloatValue => res = "float".to_string(), + TokenType::LongIntValue => res = "long integer".to_string(), + TokenType::Assign => res = "=".to_string(), + TokenType::Store => res = ":=".to_string(), + TokenType::Equal => res = "==".to_string(), + TokenType::NotEqual => res = "!=".to_string(), + TokenType::Greater => res = ">".to_string(), + TokenType::Less => res = "<".to_string(), + TokenType::LessEqual => res = "<=".to_string(), + TokenType::GreaterEqual => res = ">=".to_string(), + TokenType::Not => res = "!".to_string(), + TokenType::Or => res = "||".to_string(), + TokenType::And => res = "&&".to_string(), + TokenType::Colon => res = ":".to_string(), + TokenType::Semicolon => res = ";".to_string(), + TokenType::ID => res = "identifier".to_string(), + TokenType::While => res = "while".to_string(), + TokenType::For => res = "for".to_string(), + TokenType::If => res = "if".to_string(), + TokenType::Else => res = "else".to_string(), + TokenType::Class => res = "class".to_string(), + TokenType::Match => res = "match".to_string(), + TokenType::Func => res = "func".to_string(), + TokenType::EndOfLine => res = "end of line".to_string(), + } + write!(f, "{}", res) + } } #[derive(PartialEq, Debug)] pub struct Token { - tp: TokenType, - data: Data, + pub tp: TokenType, + pub data: Option, } struct BraceRecord { @@ -126,64 +208,28 @@ impl BraceRecord { } pub struct TokenLex<'code> { - compiler_data: &'code mut Compiler, + pub compiler_data: &'code mut Compiler, braces_check: Vec, unget_token: Vec, } impl Token { - fn new(tp: TokenType, data: Option) -> Token { - match data { - Some(data) => Token { tp, data }, - None => Token { - tp, - data: Data::NONEDATA, - }, - } + fn new(tp: TokenType, data: Option) -> Token { + Token { tp, data } } } -macro_rules! binary_symbol { - ($a:expr, $b:expr, $binary_sym:expr, $sself:expr) => {{ - let c = $sself.compiler_data.input.read(); - if c == $binary_sym { - Token::new($b, None) - } else { - $sself.compiler_data.input.unread(c); - Token::new($a, None) - } - }}; -} - -macro_rules! self_symbol { - ($sym:expr, $self_sym:expr, $sself:expr) => {{ - binary_symbol!($sym, $self_sym, '=', $sself) - }}; -} - -macro_rules! double_symbol { - ($before_sym:expr, $before_self_sym:expr, $matched_sym:expr, $matched_self_sym:expr, $matched_char:expr, $sself:expr) => {{ - let c = $sself.compiler_data.input.read(); - if c == $matched_char { - self_symbol!($matched_sym, $matched_self_sym, $sself) - } else { - $sself.compiler_data.input.unread(c); - self_symbol!($before_sym, $before_self_sym, $sself) - } - }}; -} - macro_rules! check_braces_match { - ($sself:expr, $brace_record:expr, $($front_brace:expr => $after_brace:expr),*) => {{ + ($sself:expr, $should_be_matched:expr, $brace_record:expr, $($front_brace:expr => $after_brace:expr),*) => {{ match $brace_record.c { $( $front_brace => { - if $brace_record.c != $after_brace { + if $should_be_matched != $after_brace { return Err(error::RuntimeError::new( Box::new(Content::new_line(&$sself.compiler_data.content.module_name, $brace_record.line)), ErrorInfo::new( gettext!(error::UNMATCHED_BRACE, $brace_record.c), - error::SYNTAX_ERROR, + gettext(error::SYNTAX_ERROR), ), )); } @@ -196,6 +242,36 @@ macro_rules! check_braces_match { }} } +macro_rules! hash_map { + ($($key:expr => $val:expr),*) => { + { + use std::collections::hash_map::HashMap; + let mut ret = HashMap::new(); + $( + ret.insert($key, $val); + )* + ret + } + }; +} + +lazy_static! { + static ref KEYWORDS: HashMap = hash_map![ + String::from("while") => TokenType::While, + String::from("for") => TokenType::For, + String::from("if") => TokenType::If, + String::from("else") => TokenType::Else, + String::from("class") => TokenType::Class, + String::from("func") => TokenType::Func, + String::from("match") => TokenType::Match + ]; +} + +enum NumValue { + Integer(String), + Float(String, String), +} + impl TokenLex<'_> { pub fn new<'a>(compiler_data: &'a mut Compiler) -> TokenLex<'a> { TokenLex { @@ -211,11 +287,14 @@ impl TokenLex<'_> { None => { return Err(RuntimeError::new( Box::new(self.compiler_data.content.clone()), - ErrorInfo::new(gettext!(error::UNMATCHED_BRACE, c), error::SYNTAX_ERROR), + ErrorInfo::new( + gettext!(error::UNMATCHED_BRACE, c), + gettext(error::SYNTAX_ERROR), + ), )); } - Some(c) => { - check_braces_match!(self, c, + Some(cc) => { + check_braces_match!(self, c, cc, '{' => '}', '[' => ']', '(' => ')' @@ -225,7 +304,64 @@ impl TokenLex<'_> { } } - fn lex_symbol(&mut self, c: char) -> anyhow::Result { + fn lex_id(&mut self, c: char) -> error::RunResult { + Ok({ + let mut retname: String = String::from(c); + loop { + let c = self.compiler_data.input.read(); + if Self::is_id_char(c) { + retname.push(c); + } else { + self.compiler_data.input.unread(c); + break; + } + } + let tmp = KEYWORDS.get(&retname); + match tmp { + Some(val) => Token::new((*val).clone(), None), + None => Token::new( + TokenType::ID, + Some(self.compiler_data.const_pool.add_id(retname)), + ), + } + }) + } + + fn check_whether_symbol(c: char) -> bool { + match c { + '.' | ',' | '{' | '}' | '[' | ']' | '(' | ')' | '+' | '-' | '*' | '%' | '/' | '=' + | '!' | '>' | '<' | '~' | '^' | '|' | ':' | ';' => true, + _ => false, + } + } + + fn is_useless_char(c: char) -> bool { + match c { + ' ' | '\n' | '\t' | '\0' => true, + _ => false, + } + } + + fn is_string_begin(c: char) -> bool { + match c { + '"' | '\'' => true, + _ => false, + } + } + + fn is_id_char(c: char) -> bool { + if Self::check_whether_symbol(c) + || c.is_digit(10) + || Self::is_string_begin(c) + || Self::is_useless_char(c) + { + false + } else { + true + } + } + + fn lex_symbol(&mut self, c: char) -> error::RunResult { Ok(match c { '.' => Token::new(TokenType::Dot, None), ',' => Token::new(TokenType::Comma, None), @@ -256,95 +392,243 @@ impl TokenLex<'_> { self.check_braces_stack(c)?; Token::new(TokenType::RightSmallBrace, None) } - '+' => self_symbol!(TokenType::Add, TokenType::SelfAdd, self), - '-' => self_symbol!(TokenType::Sub, TokenType::SelfSub, self), - '*' => double_symbol!( + '+' => self.self_symbol(TokenType::Add, TokenType::SelfAdd), + '-' => self.self_symbol(TokenType::Sub, TokenType::SelfSub), + '*' => self.double_symbol( TokenType::Mul, TokenType::SelfMul, TokenType::Power, TokenType::SelfPower, '*', - self ), - '%' => self_symbol!(TokenType::Mod, TokenType::SelfMod, self), - '/' => double_symbol!( + '%' => self.self_symbol(TokenType::Mod, TokenType::SelfMod), + '/' => self.double_symbol( TokenType::Div, TokenType::SelfDiv, TokenType::ExactDiv, - TokenType::SelfExtraDiv, + TokenType::SelfExactDiv, '/', - self ), - '=' => binary_symbol!(TokenType::Assign, TokenType::Equal, '=', self), - '!' => binary_symbol!(TokenType::Not, TokenType::NotEqual, '=', self), - '>' => binary_symbol!(TokenType::Greater, TokenType::GreaterEqual, '=', self), - '<' => binary_symbol!(TokenType::Less, TokenType::LessEqual, '=', self), + '=' => self.binary_symbol(TokenType::Assign, TokenType::Equal, '='), + '!' => self.binary_symbol(TokenType::Not, TokenType::NotEqual, '='), + '>' => self.double_symbol( + TokenType::Greater, + TokenType::GreaterEqual, + TokenType::BitRightShift, + TokenType::SelfBitRightShift, + '>', + ), + '<' => self.double_symbol( + TokenType::Less, + TokenType::LessEqual, + TokenType::BitLeftShift, + TokenType::SelfBitLeftShift, + '<', + ), '~' => Token::new(TokenType::BitNot, None), '^' => Token::new(TokenType::Xor, None), - '|' => { - binary_symbol!(TokenType::Or, TokenType::BitOr, '|', self) + '|' => self.binary_symbol(TokenType::Or, TokenType::BitOr, '|'), + ':' => Token::new(TokenType::Colon, None), + ';' => Token::new(TokenType::Semicolon, None), + _ => { + panic!("Not a symbol.Compiler error") } - _ => panic!("Not a symbol.Compiler error"), }) } - fn lex_num(&mut self, c: char) -> Token { - // to save the int in str - let mut s = String::new(); + /// lex only an integer + fn lex_num_integer(&mut self, c: char, radix: u32) -> String { + let mut s = String::from(c); + let mut presecnt_lex; + loop { + presecnt_lex = self.compiler_data.input.read(); + if presecnt_lex == '_' { + continue; + } + if presecnt_lex.is_digit(radix) { + s.push(presecnt_lex); + } else { + self.compiler_data.input.unread(presecnt_lex); + break; + } + } + s + } + + fn lex_int_float(&mut self, mut c: char) -> RunResult { // the radix of result let mut radix = 10; - let presecnt_lex; + let mut prefix = String::new(); if c == '0' { - presecnt_lex = self.compiler_data.input.read(); - match presecnt_lex { - '\0' => { - return Token::new(TokenType::IntValue, Some(Data::Ind(INT_VAL_POOL_ZERO))); + // check the radix + c = self.compiler_data.input.read(); + match c { + 'x' | 'X' => { + prefix = String::from("0x"); + radix = 16; } - _ => match presecnt_lex { - 'x' | 'X' => { - s += "0x"; - radix = 16; - } - 'b' | 'B' => { - s += "0b"; - radix = 2; - } - 'o' | 'O' => { - s += "0o"; - radix = 8; - } - _ => {} - }, + 'b' | 'B' => { + prefix = String::from("0b"); + radix = 2; + } + 'o' | 'O' => { + prefix = String::from("0o"); + radix = 8; + } + _ => { + self.compiler_data.input.unread(c); + return Ok(NumValue::Integer(String::from("0"))); + } + } + c = self.compiler_data.input.read(); + } + let intpart = format!("{prefix}{}", self.lex_num_integer(c, radix)); + if c == '.' { + // float can be used with prefix + if !prefix.is_empty() { + return Err(RuntimeError::new( + Box::new(self.compiler_data.content.clone()), + ErrorInfo::new(gettext!(PREFIX_FOR_FLOAT, prefix), gettext(SYNTAX_ERROR)), + )); + } + // float mode + c = self.compiler_data.input.read(); + let float_part = self.lex_num_integer(c, radix); + if float_part.len() + intpart.len() > FLOAT_OVER_FLOW_LIMIT { + // overflow + return Err(RuntimeError::new( + Box::new(self.compiler_data.content.clone()), + ErrorInfo::new( + gettext!(FLOAT_OVER_FLOW, format!("{intpart}.{float_part}")), + gettext(NUMBER_OVER_FLOW), + ), + )); } + return Ok(NumValue::Float(intpart, float_part)); } else { - s = c.to_string(); + self.compiler_data.input.unread(c); } - loop { - match self.compiler_data.input.read() { - '\0' => { - break; + Ok(NumValue::Integer(intpart)) + } + + fn turn_to_token(&mut self, val: NumValue) -> Token { + match val { + NumValue::Float(v1, v2) => Token::new( + TokenType::FloatValue, + Some( + self.compiler_data + .const_pool + .add_float(Float::new(v1.parse().unwrap(), v2.parse().unwrap())), + ), + ), + NumValue::Integer(it) => Token::new( + TokenType::IntValue, + Some(self.compiler_data.const_pool.add_int(it.parse().unwrap())), + ), + } + } + + fn lex_num(&mut self, mut c: char) -> RunResult { + let tmp = self.lex_int_float(c)?; + c = self.compiler_data.input.read(); + if c == 'e' || c == 'E' { + c = self.compiler_data.input.read(); + let mut up: i32 = self.lex_num_integer(c, 10).parse().unwrap(); + match tmp { + NumValue::Integer(mut it) => { + if up >= 0 { + // 保留int身份 + for i in 0..up { + it.push('0'); + } + return Ok(Token::new( + TokenType::IntValue, + Some(self.compiler_data.const_pool.add_int(it.parse().unwrap())), + )); + } else { + // 负数次,升级为float + let mut float_part = String::new(); + up = -up; + for i in 0..up { + let tmp = it.pop(); + match tmp { + None => { + float_part.insert(0, '0'); + } + Some(c) => { + float_part.insert(0, c); + } + } + } + if it.is_empty() { + it = String::from("0"); + } + return Ok(Token::new( + TokenType::FloatValue, + Some(self.compiler_data.const_pool.add_float(Float::new( + it.parse().unwrap(), + float_part.parse().unwrap(), + ))), + )); + } } - c => { - if c.is_digit(radix) { - s.push(c); + NumValue::Float(mut v1, mut v2) => { + if up >= 0 { + for i in 0..up { + if v2.is_empty() { + v1.push('0'); + } else { + let tmp = v2.remove(0); + v1.push(tmp); + } + } + if v2.is_empty() { + v2 = String::from("0"); + } + return Ok(Token::new( + TokenType::FloatValue, + Some( + self.compiler_data.const_pool.add_float(Float::new( + v1.parse().unwrap(), + v2.parse().unwrap(), + )), + ), + )); } else { - self.compiler_data.input.unread(c); - break; + up = -up; + for i in 0..up { + let tmp = v1.pop(); + match tmp { + Some(c) => { + v2.insert(0, c); + } + None => { + v2.insert(0, '0'); + } + } + } + if v1.is_empty() { + v1 = String::from('0'); + } + return Ok(Token::new( + TokenType::FloatValue, + Some( + self.compiler_data.const_pool.add_float(Float::new( + v1.parse().unwrap(), + v2.parse().unwrap(), + )), + ), + )); } } } + } else { + self.compiler_data.input.unread(c); + return Ok(self.turn_to_token(tmp)); } - Token::new( - TokenType::IntValue, - Some(Data::Ind( - self.compiler_data - .const_pool - .add_int(s.parse().expect("wrong string to int")), - )), - ) } - fn lex_str(&mut self, start_char: char) -> anyhow::Result { + fn lex_str(&mut self, start_char: char) -> error::RunResult { let mut s = String::new(); let mut c = self.compiler_data.input.read(); while c != start_char { @@ -356,6 +640,7 @@ impl TokenLex<'_> { '\\' => '\\', '"' => '"', '\'' => '\'', + '0' => '\0', _ => { s.push('\\'); c @@ -369,18 +654,18 @@ impl TokenLex<'_> { Box::new(self.compiler_data.content.clone()), error::ErrorInfo::new( gettext!(error::STRING_WITHOUT_END, start_char), - error::SYNTAX_ERROR, + gettext(error::SYNTAX_ERROR), ), ); } } Ok(Token::new( TokenType::StringValue, - Some(Data::Ind(self.compiler_data.const_pool.add_string(s))), + Some(self.compiler_data.const_pool.add_string(s)), )) } - fn next_token(&mut self) -> anyhow::Result> { + pub fn next_token(&mut self) -> error::RunResult> { if !self.unget_token.is_empty() { let tmp = self.unget_token.pop().unwrap(); if tmp.tp == TokenType::EndOfLine { @@ -388,31 +673,32 @@ impl TokenLex<'_> { } return Ok(Some(tmp)); } - let mut presecnt_lex = self.compiler_data.input.read(); + let mut presecnt_lex; loop { + presecnt_lex = self.compiler_data.input.read(); match presecnt_lex { '\0' => { return Ok(None); } - c => match c { - '\t' | ' ' => { - continue; - } - '\n' => { - self.compiler_data.content.add_line(); - } - _ => break, - }, + '\t' | ' ' => { + continue; + } + '\n' => { + self.compiler_data.content.add_line(); + } + _ => break, } - presecnt_lex = self.compiler_data.input.read(); } if presecnt_lex.is_digit(10) { - return Ok(Some(self.lex_num(presecnt_lex))); + return Ok(Some(self.lex_num(presecnt_lex)?)); } - if presecnt_lex == '\'' || presecnt_lex == '"' { + if Self::is_string_begin(presecnt_lex) { return Ok(Some(self.lex_str(presecnt_lex)?)); } - Ok(Some(self.lex_symbol(presecnt_lex)?)) + if Self::check_whether_symbol(presecnt_lex) { + return Ok(Some(self.lex_symbol(presecnt_lex)?)); + } + Ok(Some(self.lex_id(presecnt_lex)?)) } fn next_back(&mut self, t: Token) { @@ -432,93 +718,305 @@ impl TokenLex<'_> { )), ErrorInfo::new( gettext!(error::UNMATCHED_BRACE, unmatch_char.c), - error::SYNTAX_ERROR, + gettext(error::SYNTAX_ERROR), ), )); } Ok(()) } + + fn binary_symbol(&mut self, a: TokenType, b: TokenType, binary_sym: char) -> Token { + let c = self.compiler_data.input.read(); + if c == binary_sym { + Token::new(b, None) + } else { + self.compiler_data.input.unread(c); + Token::new(a, None) + } + } + + fn self_symbol(&mut self, sym: TokenType, self_sym: TokenType) -> Token { + self.binary_symbol(sym, self_sym, '=') + } + + fn double_symbol( + &mut self, + before_sym: TokenType, + before_self_sym: TokenType, + matched_sym: TokenType, + matched_self_sym: TokenType, + matched_char: char, + ) -> Token { + let c = self.compiler_data.input.read(); + if c == matched_char { + self.self_symbol(matched_sym, matched_self_sym) + } else { + self.compiler_data.input.unread(c); + self.self_symbol(before_sym, before_self_sym) + } + } } impl Drop for TokenLex<'_> { fn drop(&mut self) { // check the braces stack + match self.check() { + Err(e) => { + eprintln!("{}", e); + exit(1); + } + _ => {} + } } } #[cfg(test)] mod tests { - use crate::compiler::{InputSource, Option}; + use std::{collections::HashSet, hash::Hash}; use super::*; + use crate::compiler::{Float, InputSource, Option, Pool, INT_VAL_POOL_ONE}; + + macro_rules! gen_test_token_env { + ($test_string:expr, $env_name:ident) => { + let mut env = Compiler::new_string_compiler( + Option::new(false, InputSource::StringInternal), + $test_string, + ); + let mut $env_name = TokenLex::new(&mut env); + }; + } fn check(tokenlex: &mut TokenLex, expected_res: Vec) { for i in expected_res { assert_eq!(i, tokenlex.next_token().unwrap().unwrap()); } assert_eq!(None, tokenlex.next_token().unwrap()); + tokenlex.check().unwrap(); + } + + /// check const pool + fn check_pool(v: Vec, pool_be_checked: &Pool) + where + T: Eq + Hash + Clone, + { + let mut testpool: HashSet = HashSet::new(); + for i in &v { + testpool.insert((*i).clone()); + } + assert_eq!(testpool.len(), pool_be_checked.len()); + for i in &testpool { + assert!(pool_be_checked.contains_key(i)); + } } #[test] fn test_numberlex() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), + gen_test_token_env!( r#",,.,100 - 123.9 232_304904 - 0b011 - 0x2aA4 - 0o2434 0 0"#, + 123.9 232_304904 + 0b011 + 0x2aA4 + 0o2434 0 0 1e3.8 1e9 1.2e1 8e-1"#, + t + ); + check( + &mut t, + vec![ + Token::new(TokenType::Comma, None), + Token::new(TokenType::Comma, None), + Token::new(TokenType::Dot, None), + Token::new(TokenType::Comma, None), + Token::new(TokenType::FloatValue, Some(0)), + Token::new(TokenType::IntValue, Some(1)), + Token::new(TokenType::IntValue, Some(2)), + Token::new(TokenType::IntValue, Some(3)), + Token::new(TokenType::IntValue, Some(4)), + Token::new(TokenType::IntValue, Some(INT_VAL_POOL_ZERO)), + Token::new(TokenType::IntValue, Some(INT_VAL_POOL_ZERO)), + Token::new(TokenType::FloatValue, Some(1)), + Token::new(TokenType::IntValue, Some(5)), + Token::new(TokenType::FloatValue, Some(2)), + Token::new(TokenType::FloatValue, Some(3)), + ], + ); + check_pool( + vec![100, 232_304904, 0b011, 0x2aA4, 0, 1], + &t.compiler_data.const_pool.const_ints, + ); + check_pool( + vec![ + Float::new(123, 9), + Float::new(1, 2), + Float::new(1000, 8), + Float::new(0, 8), + ], + &t.compiler_data.const_pool.const_floats, ); - let mut t = TokenLex::new(&mut env); - let res = vec![ - Token::new(TokenType::Comma, None), - Token::new(TokenType::Comma, None), - Token::new(TokenType::Dot, None), - Token::new(TokenType::Comma, None), - Token::new(TokenType::FloatValue, Some(Data::Ind(0))), - Token::new(TokenType::IntValue, Some(Data::Ind(1))), - Token::new(TokenType::IntValue, Some(Data::Ind(2))), - Token::new(TokenType::IntValue, Some(Data::Ind(3))), - Token::new(TokenType::IntValue, Some(Data::Ind(4))), - Token::new(TokenType::IntValue, Some(Data::Ind(INT_VAL_POOL_ZERO))), - Token::new(TokenType::IntValue, Some(Data::Ind(INT_VAL_POOL_ZERO))), - ]; - check(&mut t, res); } #[test] fn test_symbol_lex() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), - r#":{}[]()+=%=//= // /=** *=*"#, + gen_test_token_env!( + r#":{}[]()+=%=//= // /=** *=*, + >><< >>="#, + t + ); + check( + &mut t, + vec![ + Token::new(TokenType::Colon, None), + Token::new(TokenType::LeftBigBrace, None), + Token::new(TokenType::RightBigBrace, None), + Token::new(TokenType::LeftMiddleBrace, None), + Token::new(TokenType::RightMiddleBrace, None), + Token::new(TokenType::LeftSmallBrace, None), + Token::new(TokenType::RightSmallBrace, None), + Token::new(TokenType::SelfAdd, None), + Token::new(TokenType::SelfMod, None), + Token::new(TokenType::SelfExactDiv, None), + Token::new(TokenType::ExactDiv, None), + Token::new(TokenType::SelfDiv, None), + Token::new(TokenType::Power, None), + Token::new(TokenType::SelfMul, None), + Token::new(TokenType::Mul, None), + Token::new(TokenType::Comma, None), + Token::new(TokenType::BitRightShift, None), + Token::new(TokenType::BitLeftShift, None), + Token::new(TokenType::SelfBitRightShift, None), + ], ); - let mut t = TokenLex::new(&mut env); - let res = vec![Token::new(TokenType::StringValue, Some(Data::Ind(0)))]; - check(&mut t, res); } + #[test] fn test_string_lex() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), - r#""s"'sd''sdscdcdfvf'"depkd"''"\n\t"'ttt\tt'"#, + gen_test_token_env!(r#""s"'sd''sdscdcdfvf'"depkd"''"\n\t"'ttt\tt'"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::StringValue, Some(0)), + Token::new(TokenType::StringValue, Some(1)), + Token::new(TokenType::StringValue, Some(2)), + Token::new(TokenType::StringValue, Some(3)), + Token::new(TokenType::StringValue, Some(4)), + Token::new(TokenType::StringValue, Some(5)), + Token::new(TokenType::StringValue, Some(6)), + ], + ); + check_pool( + vec![ + String::from("s"), + String::from("sd"), + String::from("sdscdcdfvf"), + String::from("depkd"), + String::from(""), + String::from("\n\t"), + String::from("ttt\tt"), + ], + &t.compiler_data.const_pool.const_strings, ); - let res = vec![Token::new(TokenType::StringValue, Some(Data::Ind(0)))]; } #[test] fn test_comprehensive_lex() {} + #[test] + fn test_id_lex() { + gen_test_token_env!(r#"id fuck _fuck 天帝abc abc天帝"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::ID, Some(0)), + Token::new(TokenType::ID, Some(1)), + Token::new(TokenType::ID, Some(2)), + Token::new(TokenType::ID, Some(3)), + Token::new(TokenType::ID, Some(4)), + ], + ); + check_pool( + vec![ + String::from("id"), + String::from("fuck"), + String::from("_fuck"), + String::from("天帝abc"), + String::from("abc天帝"), + ], + &t.compiler_data.const_pool.name_pool, + ); + } + + #[test] + fn test_wrong_number1() { + gen_test_token_env!(r#"0b123"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::IntValue, Some(INT_VAL_POOL_ONE)), + Token::new(TokenType::IntValue, Some(2)), + ], + ); + check_pool(vec![0b1, 23, 0], &t.compiler_data.const_pool.const_ints); + } + + #[test] + fn test_wrong_number2() { + gen_test_token_env!(r#"0xabchds"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::IntValue, Some(2)), + Token::new(TokenType::ID, Some(0)), + ], + ); + check_pool(vec![0xabc], &t.compiler_data.const_pool.const_ints); + check_pool( + vec![String::from("hds")], + &t.compiler_data.const_pool.name_pool, + ); + } + + #[test] + fn test_next_back() { + gen_test_token_env!(r#":()"#, t); + let tmp = t.next_token().unwrap().unwrap(); + assert_eq!(tmp.tp, TokenType::Colon); + t.next_back(tmp); + assert_eq!(t.next_token().unwrap().unwrap().tp, TokenType::Colon); + check( + &mut t, + vec![ + Token::new(TokenType::LeftSmallBrace, None), + Token::new(TokenType::RightSmallBrace, None), + ], + ); + } + + #[test] + #[should_panic] + fn test_braces_check2() { + gen_test_token_env!(r#":)|"#, t); + check( + &mut t, + vec![ + Token::new(TokenType::Colon, None), + Token::new(TokenType::LeftSmallBrace, None), + Token::new(TokenType::BitAnd, None), + ], + ); + } + #[test] #[should_panic] - fn test_wrong_number() { - let mut env = Compiler::new_string_compiler( - Option::new(false, InputSource::StringInternal), - r#"0xtghhy 0b231"#, + fn test_braces_check1() { + gen_test_token_env!(r#":("#, t); + check( + &mut t, + vec![ + Token::new(TokenType::Colon, None), + Token::new(TokenType::LeftSmallBrace, None), + ], ); - let mut t = TokenLex::new(&mut env); - t.next_token().unwrap(); - t.next_token().unwrap(); } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index df0259ec..5b8370f3 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -18,19 +18,19 @@ struct Args { #[derive(Debug, Subcommand)] enum Commands { - build { optimize: bool }, - tshell {}, + Build { optimize: bool }, + Tshell {}, } pub fn run() { let cli = Args::parse(); match cli.mode { - Commands::build { optimize: opt } => { + Commands::Build { optimize: opt } => { for i in cli.files { tools::compile(compiler::Option::new(opt, compiler::InputSource::File(i))); } } - Commands::tshell {} => { + Commands::Tshell {} => { tools::tshell::tshell(); } }; diff --git a/rust/src/tools/compile_tool.rs b/rust/src/tools/compile_tool.rs index 0aa71d23..616c7446 100644 --- a/rust/src/tools/compile_tool.rs +++ b/rust/src/tools/compile_tool.rs @@ -1,6 +1,14 @@ +use std::process::exit; + use crate::compiler; pub fn compile(opt: compiler::Option) { let mut compiler = compiler::Compiler::new(opt); - compiler.lex(); + match compiler.lex() { + Ok(data) => {} + Err(e) => { + eprintln!("{}", e); + exit(1) + } + } } diff --git a/rust/src/tvm.rs b/rust/src/tvm.rs index 931ab841..084e0634 100644 --- a/rust/src/tvm.rs +++ b/rust/src/tvm.rs @@ -4,6 +4,7 @@ mod function; mod gc; mod types; +use crate::base::codegen::{self, StaticData}; use gettextrs::gettext; use crate::{ @@ -15,22 +16,6 @@ use crate::{ use self::types::trcint::TrcInt; -pub struct ConstPool { - pub intpool: Vec, - pub stringpool: Vec, - pub floatpool: Vec, -} - -impl ConstPool { - pub fn new() -> Self { - Self { - intpool: Vec::new(), - stringpool: Vec::new(), - floatpool: Vec::new(), - } - } -} - pub struct DynaData<'a> { obj_stack: Vec>, frames_stack: Vec>, @@ -45,18 +30,11 @@ impl<'a> DynaData<'a> { } } -pub struct Inst { - opcode: Opcode, - operand: usize, -} - pub struct Vm<'a> { - constpool: ConstPool, - inst: Vec, - funcs: Vec, run_contnet: Content, dynadata: DynaData<'a>, pc: usize, + static_data: StaticData, } #[derive(Debug, Clone)] @@ -92,39 +70,6 @@ impl Content { } } -enum Opcode { - Add, - Sub, - Mul, - Div, - ExtraDiv, - Mod, - Power, - Eq, - Ne, - Lt, - Le, - Gt, - Ge, - And, - Or, - Not, - Xor, - BitNot, - BitAnd, - BitOr, - BitLeftShift, - BitRightShift, - // change the option code index - Goto, - // return from a function - PopFrame, - // create a frame to hold the function - NewFrame, - // Load a int from const pool - LoadInt, -} - /// reduce the duplicate code to solve the operator running macro_rules! binary_opcode { ($trait_used:ident, $sself:expr) => {{ @@ -133,7 +78,7 @@ macro_rules! binary_opcode { if t1.is_none() || t2.is_none() { return Err(RuntimeError::new( Box::new($sself.run_contnet.clone()), - ErrorInfo::new(gettext!(VM_DATA_NUMBER, 2), VM_ERROR), + ErrorInfo::new(gettext!(VM_DATA_NUMBER, 2), gettext(VM_ERROR)), )); } let t1 = t1.unwrap(); @@ -156,7 +101,7 @@ macro_rules! unary_opcode { if t1.is_none() { return Err(RuntimeError::new( Box::new($sself.run_contnet.clone()), - ErrorInfo::new(gettext!(VM_DATA_NUMBER, 1), VM_ERROR), + ErrorInfo::new(gettext!(VM_DATA_NUMBER, 1), gettext(VM_ERROR)), )); } let t1 = t1.unwrap(); @@ -175,61 +120,75 @@ macro_rules! unary_opcode { impl<'a> Vm<'a> { pub fn new() -> Self { Self { - constpool: ConstPool::new(), - inst: Vec::new(), pc: 0, - funcs: vec![], dynadata: DynaData::new(), run_contnet: Content::new(cfg::MAIN_MODULE_NAME), + static_data: StaticData::new(), + } + } + + pub fn new_init(static_data: StaticData) -> Self { + Self { + pc: 0, + + dynadata: DynaData::new(), + run_contnet: Content::new(cfg::MAIN_MODULE_NAME), + static_data, } } pub fn run(&mut self) -> Result<(), RuntimeError> { - while self.pc < self.inst.len() { - match self.inst[self.pc].opcode { - Opcode::Add => binary_opcode!(add, self), - Opcode::Sub => binary_opcode!(sub, self), - Opcode::Mul => binary_opcode!(mul, self), - Opcode::Div => binary_opcode!(div, self), - Opcode::ExtraDiv => binary_opcode!(extra_div, self), - Opcode::Mod => binary_opcode!(modd, self), - Opcode::Gt => binary_opcode!(gt, self), - Opcode::Lt => binary_opcode!(lt, self), - Opcode::Ge => binary_opcode!(ge, self), - Opcode::Le => binary_opcode!(le, self), - Opcode::Eq => binary_opcode!(eq, self), - Opcode::Ne => binary_opcode!(ne, self), - Opcode::And => binary_opcode!(and, self), - Opcode::Or => binary_opcode!(or, self), - Opcode::Power => binary_opcode!(power, self), - Opcode::Not => unary_opcode!(not, self), - Opcode::Xor => binary_opcode!(xor, self), - Opcode::NewFrame => {} - Opcode::PopFrame => { + while self.pc < self.static_data.inst.len() { + match self.static_data.inst[self.pc].opcode { + codegen::Opcode::Add => binary_opcode!(add, self), + codegen::Opcode::Sub => binary_opcode!(sub, self), + codegen::Opcode::Mul => binary_opcode!(mul, self), + codegen::Opcode::Div => binary_opcode!(div, self), + codegen::Opcode::ExtraDiv => binary_opcode!(extra_div, self), + codegen::Opcode::Mod => binary_opcode!(modd, self), + codegen::Opcode::Gt => binary_opcode!(gt, self), + codegen::Opcode::Lt => binary_opcode!(lt, self), + codegen::Opcode::Ge => binary_opcode!(ge, self), + codegen::Opcode::Le => binary_opcode!(le, self), + codegen::Opcode::Eq => binary_opcode!(eq, self), + codegen::Opcode::Ne => binary_opcode!(ne, self), + codegen::Opcode::And => binary_opcode!(and, self), + codegen::Opcode::Or => binary_opcode!(or, self), + codegen::Opcode::Power => binary_opcode!(power, self), + codegen::Opcode::Not => unary_opcode!(not, self), + codegen::Opcode::Xor => binary_opcode!(xor, self), + codegen::Opcode::NewFrame => {} + codegen::Opcode::PopFrame => { let ret = self.dynadata.frames_stack.pop(); if let None = ret { return Err(RuntimeError::new( Box::new(self.run_contnet.clone()), - ErrorInfo::new(gettext(VM_FRAME_EMPTY), VM_ERROR), + ErrorInfo::new(gettext(VM_FRAME_EMPTY), gettext(VM_ERROR)), )); } } - Opcode::Goto => { - self.pc = self.inst[self.pc].operand; + codegen::Opcode::Goto => { + self.pc = self.static_data.inst[self.pc].operand; } - Opcode::LoadInt => { + codegen::Opcode::LoadInt => { self.dynadata.obj_stack.push(Box::new(TrcInt::new( - self.constpool.intpool[self.inst[self.pc].operand], + self.static_data.constpool.intpool[self.static_data.inst[self.pc].operand], ))); } - Opcode::BitAnd => binary_opcode!(bit_and, self), - Opcode::BitOr => binary_opcode!(bit_or, self), - Opcode::BitNot => unary_opcode!(bit_not, self), - Opcode::BitLeftShift => binary_opcode!(bit_left_shift, self), - Opcode::BitRightShift => binary_opcode!(bit_right_shift, self), + codegen::Opcode::BitAnd => binary_opcode!(bit_and, self), + codegen::Opcode::BitOr => binary_opcode!(bit_or, self), + codegen::Opcode::BitNot => unary_opcode!(bit_not, self), + codegen::Opcode::BitLeftShift => binary_opcode!(bit_left_shift, self), + codegen::Opcode::BitRightShift => binary_opcode!(bit_right_shift, self), } self.pc += 1; } Ok(()) } } + +#[cfg(test)] +mod tests { + #[test] + fn test_vm() {} +} diff --git a/rust/src/tvm/algo/string.rs b/rust/src/tvm/algo/string.rs index 75a597d2..f6bf81f2 100644 --- a/rust/src/tvm/algo/string.rs +++ b/rust/src/tvm/algo/string.rs @@ -8,7 +8,7 @@ pub fn kmp(main_string: &str, pattern: &str) -> usize { let pattern: Vec = pattern.chars().collect(); let mut ans = 0; for i in main_string.chars() { - while j != -1 && pattern[(j + 1) as usize] == i { + while j != -1 && pattern[(j + 1) as usize] != i { j = next_arr[j as usize] as i64; } if pattern[(j + 1) as usize] == i { @@ -16,6 +16,7 @@ pub fn kmp(main_string: &str, pattern: &str) -> usize { } if j as usize == pattern.len() - 1 { ans += 1; + j = next_arr[j as usize] as i64; } } ans @@ -38,13 +39,23 @@ pub fn kmp_next(pattern: &str) -> Vec { ret } -pub fn sa(s: &str) { - let mut sa: Vec = Vec::new(); - let mut t: HashMap = HashMap::new(); +pub fn sa(s: &str) -> Vec { + let mut sa: Vec = Vec::new(); + let mut t: HashMap = HashMap::new(); for i in s.chars() { let tmp = t.entry(i).or_insert(0); *tmp += 1; } + for i in &t { + sa.push(*i.0 as usize); + } + let mut rk: Vec = Vec::new(); + rk.resize(sa.len(), 0); + for i in &sa { + let _tmp = t.entry(char::from_u32(*i as u32).unwrap()); + // rk[] + } + sa } #[cfg(test)] @@ -56,6 +67,13 @@ mod tests { let sarray = sa(s); } + #[test] + fn sa_2() { + let s = "ababa"; + let sarray = vec![5, 3, 1, 4, 2]; + assert_eq!(sa(s), sarray); + } + #[test] fn kmp_1() { assert_eq!(kmp("ABABABC", "ABA"), 2); diff --git a/rust/src/tvm/function.rs b/rust/src/tvm/function.rs index cc5ec25b..e8ebba43 100644 --- a/rust/src/tvm/function.rs +++ b/rust/src/tvm/function.rs @@ -1,14 +1,4 @@ -use super::types::TrcObj; - -pub struct Func { - name: String, -} - -impl Func { - fn new(name: String) -> Self { - Self { name } - } -} +use crate::base::func; /// A content structure which hold the running info of the function pub struct Frame<'a> { @@ -16,7 +6,7 @@ pub struct Frame<'a> { } impl<'a> Frame<'a> { - fn new(func: &'a Func) -> Self { + fn new(func: &'a func::Func) -> Self { Self { name: &func.name } } } diff --git a/rust/src/tvm/types.rs b/rust/src/tvm/types.rs index 144c06d2..ab76e6f6 100644 --- a/rust/src/tvm/types.rs +++ b/rust/src/tvm/types.rs @@ -3,6 +3,7 @@ use downcast_rs::{impl_downcast, Downcast}; use gettextrs::gettext; pub mod data_structure; +pub mod trcbigint; pub mod trcbool; pub mod trcfloat; pub mod trcint; @@ -16,7 +17,7 @@ macro_rules! unsupported_operator { $operator_name, $sself.get_type_name() ), - error::SYNTAX_ERROR, + gettext(error::SYNTAX_ERROR), )) }; } @@ -40,7 +41,7 @@ macro_rules! impl_oper { Ok(Box::new($newtype::new($oper(self.value, v.value)$whether_throw_error))) }, None => { - Err(ErrorInfo::new(gettext!(OPERATOR_IS_NOT_SUPPORT, $error_oper_name, other.get_type_name()), OPERATOR_ERROR)) + Err(ErrorInfo::new(gettext!(OPERATOR_IS_NOT_SUPPORT, $error_oper_name, other.get_type_name()), gettext(OPERATOR_ERROR))) } } } @@ -53,7 +54,7 @@ macro_rules! impl_oper { Ok(Box::new($newtype::new(self.value $oper v.value))) }, None => { - Err(ErrorInfo::new(gettext!(OPERATOR_IS_NOT_SUPPORT, $error_oper_name, other.get_type_name()), OPERATOR_ERROR)) + Err(ErrorInfo::new(gettext!(OPERATOR_IS_NOT_SUPPORT, $error_oper_name, other.get_type_name()), gettext(OPERATOR_ERROR))) } } } diff --git a/rust/src/tvm/types/data_structure.rs b/rust/src/tvm/types/data_structure.rs index d3cb984d..d830f380 100644 --- a/rust/src/tvm/types/data_structure.rs +++ b/rust/src/tvm/types/data_structure.rs @@ -3,12 +3,14 @@ mod deque; mod fenwick; mod forward_list; mod hash_map; +mod heap; mod list; mod map; mod priority_queue; mod queue; mod sam; mod set; +mod splay; mod st; mod stack; mod trie; diff --git a/rust/src/tvm/types/data_structure/ac.rs b/rust/src/tvm/types/data_structure/ac.rs index 46326d2e..e00ddca9 100644 --- a/rust/src/tvm/types/data_structure/ac.rs +++ b/rust/src/tvm/types/data_structure/ac.rs @@ -72,7 +72,7 @@ impl AcAutomaton { /// ac.search("world", 2); /// let ans = ac.get_ans(); /// ```` - fn search(&self, pattern: &str, id: u32) {} + fn search(&self, _pattern: &str, _id: u32) {} fn get_ans(&self) -> HashMap { HashMap::new() @@ -84,6 +84,6 @@ mod tests { use super::*; #[test] fn ac_automaton1() { - let mut ac = AcAutomaton::new(); + let ac = AcAutomaton::new(); } } diff --git a/rust/src/tvm/types/data_structure/heap.rs b/rust/src/tvm/types/data_structure/heap.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/rust/src/tvm/types/data_structure/heap.rs @@ -0,0 +1 @@ + diff --git a/rust/src/tvm/types/data_structure/splay.rs b/rust/src/tvm/types/data_structure/splay.rs new file mode 100644 index 00000000..353d7438 --- /dev/null +++ b/rust/src/tvm/types/data_structure/splay.rs @@ -0,0 +1,42 @@ +pub struct Node { + sons: [usize; 2], + cnt: usize, + sz: usize, + fa: usize, +} + +impl Node { + fn new() -> Self { + Self { + sons: [0; 2], + sz: 0, + cnt: 0, + fa: 0, + } + } +} + +pub struct Splay { + tree: Vec, +} + +impl Splay { + fn new() -> Self { + Self { tree: vec![] } + } + + fn maintain(&mut self, id: usize) { + self.tree[id].sz = + self.tree[self.tree[id].sons[0]].sz + self.tree[self.tree[id].sons[1]].sz; + } + + fn is_right(&self, id: usize) -> bool { + id == self.tree[self.tree[id].fa].sons[1] + } +} + +#[cfg(test)] +mod tests { + #[test] + fn splay() {} +} diff --git a/rust/src/tvm/types/data_structure/trie.rs b/rust/src/tvm/types/data_structure/trie.rs index 7a8f5205..e1fea3b9 100644 --- a/rust/src/tvm/types/data_structure/trie.rs +++ b/rust/src/tvm/types/data_structure/trie.rs @@ -1,3 +1,3 @@ -struct state {} +struct State {} pub struct Trie {} diff --git a/rust/src/tvm/types/trcbigint.rs b/rust/src/tvm/types/trcbigint.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/rust/src/tvm/types/trcbigint.rs @@ -0,0 +1 @@ + diff --git a/rust/src/tvm/types/trcint.rs b/rust/src/tvm/types/trcint.rs index 1f94b4dd..1acc24f6 100644 --- a/rust/src/tvm/types/trcint.rs +++ b/rust/src/tvm/types/trcint.rs @@ -23,21 +23,30 @@ impl TrcInt { fn extra_div_int(a: i64, b: i64) -> Result { if b == 0 { - return Err(ErrorInfo::new(gettext!(ZERO_DIV, a), ZERO_DIVSION_ERROR)); + return Err(ErrorInfo::new( + gettext!(ZERO_DIV, a), + gettext(ZERO_DIVSION_ERROR), + )); } Ok(a / b) } fn div_int(a: i64, b: i64) -> Result { if b == 0 { - return Err(ErrorInfo::new(gettext!(ZERO_DIV, a), ZERO_DIVSION_ERROR)); + return Err(ErrorInfo::new( + gettext!(ZERO_DIV, a), + gettext(ZERO_DIVSION_ERROR), + )); } Ok(a as f64 / b as f64) } fn mod_int(a: i64, b: i64) -> Result { if b == 0 { - return Err(ErrorInfo::new(gettext!(ZERO_DIV, a), ZERO_DIVSION_ERROR)); + return Err(ErrorInfo::new( + gettext!(ZERO_DIV, a), + gettext(ZERO_DIVSION_ERROR), + )); } Ok(a % b) } diff --git a/rust/tests/testdata/compiler/compiler1.txt b/rust/tests/testdata/compiler/compiler1.txt new file mode 100644 index 00000000..d312c0d5 --- /dev/null +++ b/rust/tests/testdata/compiler/compiler1.txt @@ -0,0 +1,3 @@ +hduefgdisvicvdsiugfuewgfjdfbsdjkfcdsfgsd + +dsefddfdfvfd \ No newline at end of file diff --git a/src/TVM/TVM.cppm b/src/TVM/TVM.cppm index b35d1ca0..954e2f28 100644 --- a/src/TVM/TVM.cppm +++ b/src/TVM/TVM.cppm @@ -75,8 +75,9 @@ public: */ template void error_report(error::error_type error, const P&... argv) { - error::send_error_interal( - error, name, std::to_string(static_data.line_number_table[run_index] + 1), argv...); + error::send_error_interal(error, name, + std::to_string(static_data.line_number_table[run_index] + 1), + argv...); } /** diff --git a/src/base/Error.cppm b/src/base/Error.cppm deleted file mode 100644 index 527e2e3f..00000000 --- a/src/base/Error.cppm +++ /dev/null @@ -1,86 +0,0 @@ -module; -#include -export module Error; -import trcdef; -import language; - -export namespace trc::error { -/** - * 报错设置 - * 系统需要知道当前处于什么模式,以合适的模式应对发生的状况 - */ -namespace error_env { - // 是否终止程序 - bool quit = true; - - class vm_run_error : public std::exception { }; -} - -// 错误,增强可读性 -enum error_type { - NameError, - ValueError, - SyntaxError, - VersionError, - OpenFileError, - ModuleNotFoundError, - ArgumentError, - ZeroDivError, - RunError, - AssertError, - IndexError, - MemoryError, - KeyError, - SystemError, - OperatorError, - RedefinedError -}; - -/** - * @brief 输出报错信息 - * @param error_name 异常名 - * @param ap 可变参数 - */ -template -void output_error_msg(error_type error_name, const argv_t&... ap) { - // 报错的模板字符串 - auto index = 0; - const char* argv_arr[] = { ap... }; - const char* base_string = argv_arr[0]; - for (size_t i = 0; base_string[i]; ++i) { - if (base_string[i] == '%') { - // 输出报错字符串 - fprintf(stderr, "%s", argv_arr[++index]); - } else { - fputc(base_string[i], stderr); - } - } - fputc('\n', stderr); -} - -template -void send_error_interal(error_type name, const std::string& module_name, - const std::string&postion_info, const argv_t&... ap) { - if constexpr (compiling) { - fprintf(stderr, "\n%s%s\n", language::error::error_from, postion_info.c_str()); - } else { - fprintf(stderr, "\n%s%s\n%s%s:\n", language::error::error_from, - module_name.c_str(), language::error::error_in_line, postion_info.c_str()); - } - // 输出错误名 - fprintf(stderr, "%s", language::error::error_map[name]); - output_error_msg(name, ap...); - // 检查设置判断是否报错 - if (error_env::quit) { - // 报错,退出程序 - exit(EXIT_FAILURE); - } - // 跳转到执行的地方 - throw error_env::vm_run_error(); -} - -template -void send_error(error_type error_name, const argv_t&... argv) { - send_error_interal(error_name, "__main__", "0", argv...); -} -} diff --git a/src/base/error.cppm b/src/base/error.cppm new file mode 100644 index 00000000..325fcee6 --- /dev/null +++ b/src/base/error.cppm @@ -0,0 +1,78 @@ +module; +#include +#include +#include +export module error; +import trcdef; + +export namespace trc::error { +class content { +public: + content() { + } + +private: + virtual const char* get_module_name() = 0; + virtual size_t get_line() = 0; +}; + +class error_info { +public: + const char* error_type; + std::string error_msg; +}; + +const char* no_reach + = "The program executed code that should not have been executed.Please " + "report the problem to the Github repository."; + +template +class [[nodiscard("ignore the result")]] Result { + +} + +class vm_run_error : public std::exception { +public: + const content* error_content; + error_info info; + vm_run_error(const content* error_content, error_info error_if) + : error_content(error_content) + , error_info(error_info) { + } + + const char* what() const noexcept override { + return "vm run error"; + } + + std::string error_msg() { + auto ret = std::vformat("\n{}{}\n{}{}:\n"); + fprintf(stderr, , error::error_from, module_name.c_str(), + error::error_in_line, postion_info.c_str()); + // 输出错误名 + fprintf(stderr, "%s", language::error::error_map[name]); + } +}; + +// 错误,增强可读性 +enum error_type { + NameError, + ValueError, + SyntaxError, + VersionError, + OpenFileError, + ModuleNotFoundError, + ArgumentError, + ZeroDivError, + RunError, + AssertError, + IndexError, + MemoryError, + KeyError, + SystemError, + OperatorError, + RedefinedError +}; + +const char* error_from = "Error from {}"; +const char* error_in_line = "Error in line {}"; +} diff --git a/src/base/io.cppm b/src/base/io.cppm index 53032f01..f43052cf 100644 --- a/src/base/io.cppm +++ b/src/base/io.cppm @@ -12,8 +12,7 @@ module; #include #include export module io; -import Error; -import language; +import error; const size_t mem_init_size = 15; const size_t mem_realloc_size = 20; diff --git a/src/base/memory/memory.cppm b/src/base/memory/memory.cppm index 413cd5ce..304d8a30 100644 --- a/src/base/memory/memory.cppm +++ b/src/base/memory/memory.cppm @@ -6,8 +6,7 @@ module; #include export module memory; -import Error; -import language; +import error; namespace trc::memory { /** diff --git a/src/base/unreach.cppm b/src/base/unreach.cppm index e950d978..27cc10d7 100644 --- a/src/base/unreach.cppm +++ b/src/base/unreach.cppm @@ -1,10 +1,11 @@ module; #include #include +#include #include #include export module unreach; -import language; +import error; namespace trc { export void unreach(const std::string& error_msg, @@ -13,7 +14,7 @@ export void unreach(const std::string& error_msg, "%s\nFatal error in function \"%s\" file %s line " "%u\n%s", error_msg.c_str(), source_info.function_name(), source_info.file_name(), - source_info.line(), language::error::noreach); + source_info.line(), gettext(error::noreach)); exit(EXIT_FAILURE); } } diff --git a/src/base/utils/filesys.cppm b/src/base/utils/filesys.cppm index 3cc2427e..15b3aaa6 100644 --- a/src/base/utils/filesys.cppm +++ b/src/base/utils/filesys.cppm @@ -13,9 +13,8 @@ module; #include #include export module filesys; -import Error; +import error; import trcdef; -import language; export namespace fs = std::filesystem; diff --git a/src/compiler/compile_env.cppm b/src/compiler/compile_env.cppm index 6dd72ed4..69c59663 100644 --- a/src/compiler/compile_env.cppm +++ b/src/compiler/compile_env.cppm @@ -94,8 +94,8 @@ size_t module_compile_env::get_index_of_function(size_t name) { return i; } } - compiler_data.send_error(error::NameError, - language::error::nameerror, compiler_data.const_name.ref[name].c_str()); + compiler_data.send_error(error::NameError, language::error::nameerror, + compiler_data.const_name.ref[name].c_str()); return 0; } @@ -119,8 +119,7 @@ size_t basic_compile_env::get_index_of_var(size_t name, bool report_error) { } if (report_error) { // 并不在当前符号表,报错 - compiler_data.send_error(error::NameError, - language::error::nameerror, + compiler_data.send_error(error::NameError, language::error::nameerror, compiler_data.const_name.ref[name].c_str()); } return unsave; diff --git a/src/compiler/compiler.cppm b/src/compiler/compiler.cppm index 5d89cb43..99bffd4b 100644 --- a/src/compiler/compiler.cppm +++ b/src/compiler/compiler.cppm @@ -14,8 +14,10 @@ public: compiler(); // Run the parser on file F. Return 0 on success. - int parse(const compiler_option& option, const std::string& f, TVM_space::TVM_static_data* vm); - int parse(const compiler_option& option, const std::string&filename, FILE* f, TVM_space::TVM_static_data* vm); + int parse(const compiler_option& option, const std::string& f, + TVM_space::TVM_static_data* vm); + int parse(const compiler_option& option, const std::string& filename, + FILE* f, TVM_space::TVM_static_data* vm); // Whether to generate parser debug traces. bool trace_parsing = false; @@ -37,15 +39,14 @@ private: namespace trc::compiler { compiler::compiler() = default; -int compiler::parse(const compiler_option& option, const std::string& f, TVM_space::TVM_static_data* vm) { - +int compiler::parse(const compiler_option& option, const std::string& f, + TVM_space::TVM_static_data* vm) { } -int compiler::parse(const compiler_option& option, const std::string&filename, FILE* f, TVM_space::TVM_static_data* vm) { - +int compiler::parse(const compiler_option& option, const std::string& filename, + FILE* f, TVM_space::TVM_static_data* vm) { } FILE* compiler::open_file(const std::string& file) { - } } diff --git a/src/compiler/compiler_def.cppm b/src/compiler/compiler_def.cppm index c45403aa..a8d37d6f 100644 --- a/src/compiler/compiler_def.cppm +++ b/src/compiler/compiler_def.cppm @@ -1,10 +1,10 @@ module; #include #include +#include #include #include #include -#include export module compiler_def; import TVM; import Error; @@ -80,7 +80,8 @@ public: */ class compiler_public_data { public: - compiler_public_data(const compiler_option& option, TVM_space::TVM_static_data& vm) + compiler_public_data( + const compiler_option& option, TVM_space::TVM_static_data& vm) : option(option) , vm(vm) , const_int(vm.const_i) @@ -105,7 +106,7 @@ public: */ template void send_error(error::error_type errorn, const P&... argv) { - //todo:optimize it + // todo:optimize it std::stringstream ss; error::send_error_interal(errorn, ss.str(), argv...); } diff --git a/src/compiler/token.cppm b/src/compiler/token.cppm new file mode 100644 index 00000000..cad0b9c6 --- /dev/null +++ b/src/compiler/token.cppm @@ -0,0 +1,665 @@ +module; +#include +#include +#include +#include +#include +#include +export module token; +import compiler_def; +import trc_flong; +import trc_long; +import Error; +import language; +import unreach; + +constexpr size_t buffersize = 4028; + +export namespace trc::compiler { +// token的标识 +enum class token_ticks : size_t { + FOR, // for + WHILE, // while + IF, // if + FUNC, // function + CLASS, // class + ADD, // + + SUB, // - + MUL, // * + DIV, // / + ZDIV, // // + MOD, // % + POW, // ** + AND, // and + OR, // or + NOT, // not + EQUAL, // == + UNEQUAL, // != + LESS, // < + GREATER, // > + LESS_EQUAL, // <= + GREATER_EQUAL, // >= + IMPORT, // import + GOTO, // goto + DEL, // del + ASSERT, // assert + BREAK, // break + CONTINUE, // continue + SELFADD, // += + SELFSUB, // -= + SELFMUL, // *= + SELFDIV, // /= + SELFZDIV, // //= + SELFMOD, // %= + SELFPOW, // **= + ASSIGN, // = + STORE, // := + NAME, // 名称 + NULL_, // null + TRUE_, // true + FALSE_, // false + STRING_VALUE, // 字符串值 + LONG_FLOAT_VALUE, // 长浮点型值 + FLOAT_VALUE, // 浮点数值 + LONG_INT_VALUE, // 长整型值 + INT_VALUE, // 整型值 + LEFT_BIG_BRACE, // { + RIGHT_BIG_BRACE, // } + LEFT_SMALL_BRACE, // ( + RIGHT_SMALL_BRACE, // ) + LEFT_MID_BRACE, // [ + RIGHT_MID_BRACE, // ] + POINT, //. + COMMA, // , + UNKNOWN, // unknown value,可以暂时用来占个位 + END_OF_TOKENS, // 解析结束 + END_OF_LINE, // 行结束 +}; + +/** + * @brief token + * @details 一个完整的token包括标识和值两部分,是解析器的基本单元 + */ +struct token { + // 标识 + token_ticks tick {}; + // 值 + size_t data = 0; +}; + +class buffer_ctrl { +private: + /** + * @brief 装载缓冲区并指向它 + * @param buf + */ + void setbuf(std::array& buf) { + char_ptr = buf.data(); + auto readsz = fread(buf.data(), buffersize, + sizeof(std::remove_reference_t::value_type), file); + buf[readsz] = 0; + } + +public: + // char buffer,prevent compiler from reading a large file and storing it in + // memory + std::array buffer1, buffer2; + FILE* file; + bool end = false; + + // 指向当前正在解析的字符 + const char* char_ptr = nullptr; + + buffer_ctrl(FILE* file) + : file(file) { + setbuf(buffer1); + } + + /** + * @brief read next char from buffer + * @return next char + */ + int nextchar() { + if (end) { + return EOF; + } + char ret = *char_ptr; + char_ptr++; + if (*char_ptr == 0) { + if (char_ptr == &buffer1.back()) { + setbuf(buffer2); + } else if (char_ptr == &buffer2.back()) { + setbuf(buffer1); + } else { + end = true; + } + } + return ret; + } + + /** + * @brief get the char now + * @return + */ + int readchar() const { + if (end) { + return EOF; + } + return *char_ptr; + } +}; + +/** + * @brief + * 这是一个将字符串转换成token流按行输出的类 + */ +class token_lex { +public: + explicit token_lex(compiler_public_data& compiler_data, FILE* file); + + ~token_lex(); + + /** + * @brief + * 从当前字符串代码中读取一个token并返回给grammar解析 + * + * @return token 返回一个有意义的token + * @return + * 特殊返回值:由于总是保证返回有意义,所以当token的tick为token_ticks::END_OF_TOKENS时,表示解析结束 + */ + token get_token(); + + /** + * @brief 退回并储存一个token + */ + void unget_token(token t); + + compiler_public_data& compiler_data; + +private: + std::stack tokenback; + + int id = 0; + + buffer_ctrl buf; + + token lexinteral(); + + // 判断是否解析到了终点 + [[nodiscard]] bool end_of_lex() const noexcept; + + // 解析数字(包括浮点数) + token lex_int_float(); + + /** + * @brief 解析一个字符串 + * 注:会略过开头结尾的"和'符号 + */ + token lex_string(); + + /** + * @brief + * 解析英文符号(包含关键字和名称两种可能) + */ + token lex_english(); + + /** + * @brief 解析其他字符,如[],()等 + * @details + * 在这里解析的字符都能被用token_ticks完整表达,所以不需要储存任何信息 + */ + token lex_others(); + + /** + * 解析符号时遇到多种情况,例如读取*后可以为*,*=,**,**=四种情况 + * 使用方法:传入符号后期待的符号,如*期待=,再依次传入期待满足时的标记和期待不满足时的标记 + */ + token_ticks get_binary_ticks( + char expected_char, token_ticks expected, token_ticks unexpected); + + /** + * 只有在有且仅有一个期待字符时使用 + * @brief + * 检查下一个字符是不是期待的字符,如果接下来是不是期待的字符,直接报错 + * + * @param expected_char 接下来唯一期待的字符 + */ + void check_expected_char(char expected_char); + + // 用于检查括号是否正确匹配 + std::stack check_brace; +}; + +token token_lex::lex_string() { + // 略过当前"符号 + char string_begin = buf.readchar(); + buf.nextchar(); + std::string str; + // 使用自定义的函数方便处理换行符,消除字符的移动 + while (true) { + if (end_of_lex()) { + // 读到文件末尾了,说明字符串解析错误 + compiler_data.error.send_error_module( + error::SyntaxError, language::error::syntaxerror_lexstring); + } + char tmp = buf.nextchar(); + if (tmp == string_begin) { + break; + } + if (tmp == '\\') { + // 转义符 + tmp = buf.nextchar(); + // 读出真实符号并匹配转为真实符号 + switch (tmp) { + case 'r': { + str += '\r'; + break; + } + case 'b': { + str += '\b'; + break; + } + case 'n': { + str += '\n'; + break; + } + case '\'': { + str += '\''; + break; + } + case '"': { + str += '"'; + break; + } + case 't': { + str += '\t'; + break; + } + case '\\': { + str += '\\'; + break; + } + case '0': { + str += '\0'; + break; + } + case 'a': { + str += '\a'; + break; + } + case 'f': { + str += '\f'; + break; + } + case 'v': { + str += '\v'; + break; + } + default: { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_escape_char); + } + } + } else { + str += tmp; + } + } + return token { token_ticks::STRING_VALUE, + compiler_data.const_string.add(str) }; +} + +token token_lex::lex_int_float() { + token_ticks tick_for_res = token_ticks::INT_VALUE; + token result; + std::string str; + size_t res_len = 0; + while (!end_of_lex()) { + char c = buf.nextchar(); + if (c == '.') { + // 小数点,开启调整类型为浮点数 + tick_for_res = token_ticks::FLOAT_VALUE; + } else if (c == '_') { + // 忽略数字中间的下划线,例如123_456 + continue; + } else if (isdigit(c)) { + str += c; + ++res_len; + } else { + break; + } + } + // 尝试纠正为长整型和长浮点型 + switch (tick_for_res) { + case token_ticks::FLOAT_VALUE: { + if (res_len > FLOAT_LONGFLOAT_LINE) { + // todo + tick_for_res = token_ticks::LONG_FLOAT_VALUE; + } else { + result.data + = compiler_data.const_float.add(strtod(str.c_str(), nullptr)); + } + break; + } + case token_ticks::INT_VALUE: { + if (res_len > INT_LONGINT_LINE) { + result.data = compiler_data.const_long_int.add(str); + tick_for_res = token_ticks::LONG_INT_VALUE; + } else { + result.data = compiler_data.const_int.add(stoi(str)); + } + break; + } + default: { + unreach(std::format("Another token tick {}", (size_t)tick_for_res)); + } + } + result.tick = tick_for_res; + return result; +} + +bool token_lex::end_of_lex() const noexcept { + char c = buf.readchar(); + return c == '\n' || c == '\0'; +} + +#define CREATE_KEYWORD(str, tick) \ + { str, tick, sizeof(str) - 1 } + +struct { + const char* str; + token_ticks tick; + size_t len; +} keywords_[] = { CREATE_KEYWORD("for", token_ticks::FOR), + CREATE_KEYWORD("while", token_ticks::WHILE), + CREATE_KEYWORD("import", token_ticks::IMPORT), + CREATE_KEYWORD("goto", token_ticks::GOTO), + CREATE_KEYWORD("del", token_ticks::DEL), + CREATE_KEYWORD("assert", token_ticks::ASSERT), + CREATE_KEYWORD("if", token_ticks::IF), + CREATE_KEYWORD("class", token_ticks::CLASS), + CREATE_KEYWORD("func", token_ticks::FUNC), + CREATE_KEYWORD("and", token_ticks::AND), + CREATE_KEYWORD("or", token_ticks::OR), + CREATE_KEYWORD("not", token_ticks::NOT), + CREATE_KEYWORD("null", token_ticks::NULL_), + CREATE_KEYWORD("true", token_ticks::TRUE_), + CREATE_KEYWORD("false", token_ticks::FALSE_), + CREATE_KEYWORD("break", token_ticks::BREAK), + CREATE_KEYWORD("continue", token_ticks::CONTINUE) }; + +#undef CREATE_KEYWORD + +token token_lex::lex_english() { + std::string tmp; + do { + char c = buf.readchar(); + if ((!is_english(c) && !isdigit(c)) || end_of_lex()) { + break; + } + buf.nextchar(); + tmp += c; + } while (true); + for (const auto& keyword : keywords_) { + if (keyword.len == tmp.length() && tmp == keyword.str) { + // 传入空串是因为能在此被匹配的,都可以用token_ticks表达含义,不需要储存具体信息 + return token { keyword.tick }; + } + } + // 啥关键字都不是,只能是名称了 + return token { token_ticks::NAME, compiler_data.const_name.add(tmp) }; +} + +token_ticks token_lex::get_binary_ticks( + char expected_char, token_ticks expected, token_ticks unexpected) { + ++char_ptr; + if (*char_ptr == expected_char) { + return expected; + } else { + --char_ptr; + return unexpected; + } +} + +void token_lex::check_expected_char(char expected_char) { + ++char_ptr; + if (*char_ptr != expected_char) { + char err_tmp[] = { *char_ptr, '\0' }; + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, err_tmp); + } +} + +token token_lex::lex_others() { + token result; + switch (*char_ptr) { + case '<': { + result = token { get_binary_ticks( + '=', token_ticks::LESS_EQUAL, token_ticks::LESS) }; + break; + } + case '>': { + result = token { get_binary_ticks( + '=', token_ticks::GREATER_EQUAL, token_ticks::GREATER) }; + break; + } + case '=': { + result = token { get_binary_ticks( + '=', token_ticks::EQUAL, token_ticks::ASSIGN) }; + break; + } + case '!': { + check_expected_char('='); + result = token { token_ticks::UNEQUAL }; + break; + } + case ':': { + check_expected_char('='); + result = token { token_ticks::STORE }; + break; + } + case '+': { + result = token { get_binary_ticks( + '=', token_ticks::SELFADD, token_ticks::ADD) }; + break; + } + case '-': { + result = token { get_binary_ticks( + '=', token_ticks::SELFSUB, token_ticks::SUB) }; + break; + } + case '*': { + // *比较特殊,有**符号 + if (get_binary_ticks('*', token_ticks::POW, token_ticks::UNKNOWN) + == token_ticks::POW) { + // 确认有两个** + result = token { get_binary_ticks( + '=', token_ticks::SELFPOW, token_ticks::POW) }; + } else { + // 只有一个* + result = token { get_binary_ticks( + '=', token_ticks::SELFMUL, token_ticks::MUL) }; + } + break; + } + case '/': { + // /符号是最特殊的,因为有//符号和/*符号 + if (get_binary_ticks('/', token_ticks::ZDIV, token_ticks::UNKNOWN) + == token_ticks::ZDIV) { + // 确认有两个// + result = token { get_binary_ticks( + '=', token_ticks::SELFZDIV, token_ticks::ZDIV) }; + } else { + // 只有一个/ + if (get_binary_ticks('*', token_ticks::MUL, token_ticks::UNKNOWN) + == token_ticks::MUL) { + // 说明是/*符号,开启注释 + + // 略过当前的*字符 + ++char_ptr; + for (;;) { + if (*char_ptr == '*') { + // 遇到*/的开头,可能可以退出,不是也不用退格,反正都是注释里的,没有实际意义 + ++char_ptr; + if (*char_ptr == '/') { + break; + } else if (*char_ptr == '\n') { + // 跨行注释也需要更新行号 + compiler_data.error.add_line(); + } + } + if (end_of_lex()) { + // 注释未结尾,报错 + compiler_data.error.send_error_module( + error::SyntaxError, + language::error::syntaxerror_lexanno); + } + ++char_ptr; + } + } else { + result = token { get_binary_ticks( + '=', token_ticks::SELFDIV, token_ticks::DIV) }; + } + } + break; + } + case '%': { + result = token { get_binary_ticks( + '=', token_ticks::SELFMOD, token_ticks::MOD) }; + break; + } + /* 以下的这些括号需要进行括号匹配进行验证 */ + case '(': { + result = token { token_ticks::LEFT_SMALL_BRACE }; + check_brace.push('('); + break; + } + case ')': { + result = token { token_ticks::RIGHT_SMALL_BRACE }; + if (check_brace.empty() || check_brace.top() != '(') { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, ")"); + } + check_brace.pop(); + break; + } + case '[': { + result = token { token_ticks::LEFT_MID_BRACE }; + check_brace.push('['); + break; + } + case ']': { + result = token { token_ticks::RIGHT_MID_BRACE }; + if (check_brace.empty() || check_brace.top() != '[') { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, "]"); + } + check_brace.pop(); + break; + } + case '{': { + result = token { token_ticks::LEFT_BIG_BRACE }; + check_brace.push('{'); + break; + } + case '}': { + result = token { token_ticks::RIGHT_BIG_BRACE }; + if (check_brace.empty() || check_brace.top() != '{') { + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, "}"); + } + check_brace.pop(); + break; + } + case ',': { + result = token { token_ticks::COMMA }; + break; + } + case '.': { + result = token { token_ticks::POINT }; + break; + } + default: { + // 如果一个字符都没有匹配到,报错 + char error_tmp[2] = { *char_ptr, '\0' }; + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_no_expect, error_tmp); + } + } + // 跳过当前字符 + ++char_ptr; + return result; +} + +void token_lex::unget_token(token t) { + if (t.tick == token_ticks::END_OF_LINE) { + compiler_data.error.sub_line(); + } + tokenback.push(t); +} + +token token_lex::get_token() { + if (!tokenback.empty()) { + auto ret = tokenback.top(); + tokenback.pop(); + return ret; + } + auto t = lexinteral(); + if (t.tick == token_ticks::END_OF_LINE) { + compiler_data.error.add_line(); + } + return t; +} + +token token_lex::lexinteral() { + if (*char_ptr == '#') { + /*忽略注释*/ + while (!end_of_lex()) { + // 只要不读完文件或本行,就往下读 + ++char_ptr; + } + } + if (*char_ptr == '\n') { + // 加一行 + compiler_data.error.add_line(); + ++char_ptr; + return token { token_ticks::END_OF_LINE }; + } + if (*char_ptr == '\0') { + // 解析结束 + return token { token_ticks::END_OF_TOKENS }; + } + while (*char_ptr == ' ' || *char_ptr == '\t') { + /*略过空白符和制表符*/ + ++char_ptr; + } + if (*char_ptr == '\'' || *char_ptr == '"') { + /*解析字符串*/ + return lex_string(); + } + if (isdigit(*char_ptr)) { + /*解析数字*/ + return lex_int_float(); + } + if (is_english(*char_ptr)) { + /*英文字符,有多种可能,累计直到匹配到关键字(关键字)或者不为英文字符(名称)*/ + return lex_english(); + } + // 各种符号的解析,不满足会报错 + return lex_others(); +} + +token_lex::token_lex(compiler_public_data& compiler_data, FILE* file) + : compiler_data(compiler_data) + , buf(file) { +} + +token_lex::~token_lex() { + compiler_data.error.reset_line(); + // 最后判断括号栈是否为空,如果不为空,说明括号未完全匹配,报错 + if (!check_brace.empty()) { + char error_tmp[] = { check_brace.top(), '\0' }; + compiler_data.error.send_error_module(error::SyntaxError, + language::error::syntaxerror_unmatched_char, error_tmp); + } +} +} \ No newline at end of file diff --git a/src/trc/Trc.cpp b/src/trc/Trc.cpp index f0586923..1893f3a6 100644 --- a/src/trc/Trc.cpp +++ b/src/trc/Trc.cpp @@ -4,10 +4,10 @@ * Author : 李沐阳 */ +#include #include #include #include -#include #ifdef UNITTEST #include #endif @@ -50,9 +50,8 @@ struct { const char* name; argv_func_tools tool_func; } cmd_tool[] = { { "tdb", tools::tools_out::tdb }, - { "help", tools::tools_out::help }, { "run", tools::tools_out::run } - , { "dis", tools::tools_out::dis }, - { "build", tools::tools_out::build }, + { "help", tools::tools_out::help }, { "run", tools::tools_out::run }, + { "dis", tools::tools_out::dis }, { "build", tools::tools_out::build }, { "style", tools::tools_out::style } }; /** diff --git a/src/trc/tools/build.cppm b/src/trc/tools/build.cppm index 8669e4d7..e855f688 100644 --- a/src/trc/tools/build.cppm +++ b/src/trc/tools/build.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module build; import TVM; import ctree_loader; @@ -19,6 +18,7 @@ import unreach; import color; import help; import compile_env; +import compiler; export namespace trc::tools { namespace tools_in { @@ -28,7 +28,8 @@ namespace tools_in { * @param path 文件的路径 */ void _build(TVM_space::TVM* vm, const std::string& path) { - compiler::compiler().parse(tools::compilerOption, path, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, path, &vm->static_data); loader::save_ctree( vm, fs::path(path).replace_extension(".ctree").string()); } diff --git a/src/trc/tools/dis.cppm b/src/trc/tools/dis.cppm index c0b57989..6d1a52cc 100644 --- a/src/trc/tools/dis.cppm +++ b/src/trc/tools/dis.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module dis; import code_loader; import ctree_loader; @@ -18,6 +17,7 @@ import compiler_def; import compile_env; import help; import color; +import compiler; namespace trc::tools { namespace tools_in { @@ -78,9 +78,8 @@ namespace tools_in { if (loader::is_magic(file_path)) loader::loader_ctree(vm, file_path); else { - compiler::compiler() - .parse(tools::compilerOption, file_path, - &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, file_path, &vm->static_data); } out(*vm, file_path); } diff --git a/src/trc/tools/generated_params.cppm b/src/trc/tools/generated_params.cppm index c30632bd..85b67a08 100644 --- a/src/trc/tools/generated_params.cppm +++ b/src/trc/tools/generated_params.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module generated_params; import compile_env; import compiler_def; @@ -18,6 +17,7 @@ import trc_flong; import trc_long; import data; import unreach; +import compiler; namespace trc::tools { bool gen_number_table = true; diff --git a/src/trc/tools/help.cppm b/src/trc/tools/help.cppm index 0cc7223b..3d5a08be 100644 --- a/src/trc/tools/help.cppm +++ b/src/trc/tools/help.cppm @@ -2,12 +2,12 @@ #include #include #include +#include export module help; import color; import trcdef; import cmdparser; import basic_def; -import language; static void output_optimze_msg() { puts(" --optimize,-o:optimize the code."); diff --git a/src/trc/tools/run.cppm b/src/trc/tools/run.cppm index 6f119853..4b4e320c 100644 --- a/src/trc/tools/run.cppm +++ b/src/trc/tools/run.cppm @@ -5,7 +5,6 @@ module; #include #include -#include export module run; import TVM; import ctree_loader; @@ -18,6 +17,7 @@ import compiler_def; import data; import help; import color; +import compiler; export namespace trc::tools { namespace tools_in { @@ -27,7 +27,8 @@ namespace tools_in { loader::loader_ctree(vm, path); } else { /*是源文件*/ - compiler::compiler().parse(tools::compilerOption, path, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, path, &vm->static_data); } vm->reload_data(); vm->run_all(); diff --git a/src/trc/tools/tdb.cppm b/src/trc/tools/tdb.cppm index 4960c53a..91f58f25 100644 --- a/src/trc/tools/tdb.cppm +++ b/src/trc/tools/tdb.cppm @@ -11,7 +11,6 @@ module; #include #include #include -#include export module tdb; import TVM; import memory; @@ -28,6 +27,7 @@ import cmdparser; import color; import help; import basic_def; +import compiler; namespace trc { namespace tdb { @@ -96,8 +96,8 @@ namespace tdb { static void debug(const std::string& file_path) { char* instruction = nullptr; TVM_space::free_TVM(vm); - compiler::compiler() - .parse(tools::compilerOption, file_path, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, file_path, &vm->static_data); vm->reload_data(); // 用于输出代码行信息 std::string code; diff --git a/src/trc/tools/tshell.cppm b/src/trc/tools/tshell.cppm index 5b7f7c35..96a6576f 100644 --- a/src/trc/tools/tshell.cppm +++ b/src/trc/tools/tshell.cppm @@ -7,7 +7,6 @@ module; #include #include #include -#include export module tshell; import TVM; import Error; @@ -18,6 +17,7 @@ import compile_env; import compiler_def; import color; import help; +import compiler; /** * @brief 判断是否为新的语句块开始 @@ -60,8 +60,6 @@ static void get_block(std::string& res) { } } - - namespace tools::tools_out { /** * @brief trc的交互式终端界面 @@ -84,7 +82,8 @@ namespace tools::tools_out { rewind(tmpf); vm->static_data.byte_codes.clear(); try { - compiler::compiler().parse(tools::compilerOption, "tshell", tmpf, &vm->static_data); + compiler::compiler().parse( + tools::compilerOption, "tshell", tmpf, &vm->static_data); vm->reload_data(); vm->run_all(); } catch (error::error_env::vm_run_error) { } diff --git a/xmake.lua b/xmake.lua index d8a72ee0..6ee890ab 100644 --- a/xmake.lua +++ b/xmake.lua @@ -9,20 +9,17 @@ add_requires("gtest") set_warnings("all", "error") add_rules("mode.debug", "mode.release") -add_rules("plugin.compile_commands.autoupdate", {outputdir = ".vscode"}) add_rules("plugin.compile_commands.autoupdate") add_cxxflags("-Wno-read-modules-implicitly", "-Wno-unused-but-set-variable") add_includedirs("src/compiler") -add_rules("lex", "yacc") target("Trc") - set_kind("binary") - add_files("src/**.cpp", "src/**.cppm", "language/**.cppm") - add_files("src/**.ll", "src/**.yy") +set_kind("binary") +add_files("src/**.cpp", "src/**.cppm") target("unittest") - set_kind("binary") - set_default(false) - add_defines("UNITTEST") - add_files("src/**.cpp", "src/**.cppm", "tests/unittest/**.cpp", "tests/unittest/**.cppm") - add_packages("gtest") +set_kind("binary") +set_default(false) +add_defines("UNITTEST") +add_files("src/**.cpp", "src/**.cppm", "tests/unittest/**.cpp", "tests/unittest/**.cppm") +add_packages("gtest")