diff --git a/rust/Cargo.lock b/rust/Cargo.lock index c81ad9d0..fa287836 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.5" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6" +checksum = "628a8f9bd1e24b4e0db2b4bc2d000b001e7dd032d54afa60a68836aeec5aa54a" dependencies = [ "anstyle", "anstyle-parse", @@ -46,7 +46,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -56,7 +56,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -82,9 +82,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "4.4.13" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52bdc885e4cacc7f7c9eedc1ef6da641603180c783c41a15c264944deeaab642" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" dependencies = [ "clap_builder", "clap_derive", @@ -92,9 +92,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.12" +version = "4.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb7fb5e4e979aec3be7791562fcba452f94ad85e954da024396433e0e25a79e9" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" dependencies = [ "anstream", "anstyle", @@ -126,6 +126,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "colored" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" +dependencies = [ + "lazy_static", + "windows-sys 0.48.0", +] + [[package]] name = "getrandom" version = "0.2.11" @@ -343,6 +353,7 @@ name = "trc" version = "0.1.0" dependencies = [ "clap", + "colored", "gettext-rs", "rand", ] @@ -387,13 +398,37 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -402,51 +437,93 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.0" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 696dc43b..eeed9e23 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -12,5 +12,6 @@ repository = "https://github.com/limuy2022/trc" [dependencies] rand = "0.8.5" -clap = { version = "4.4.12", features = ["derive"] } +clap = { version = "4.4.18", features = ["derive"] } gettext-rs = "0.7.0" +colored = "2.1.0" diff --git a/rust/docs/usage.md b/rust/docs/usage.md index e0535945..cdaad5fe 100644 --- a/rust/docs/usage.md +++ b/rust/docs/usage.md @@ -36,6 +36,16 @@ The next is the string value. To make you to write strings more easily: |'hello_world'|another method that equals to "hello world"| |"""hello world"""|this method is for code across multiple lines| +There are also many easape char in the string: + +|escape char|meaning| +|:---|:---| +|\t|tab| +|\n|new line| +|\\\\|\| +|\'|'| +|\"|"| + If you add ```r``` or ```R``` in front of the string.Trc will treat it as a raw string. Yes.These rules are from Python.I love its grammar rules @@ -100,3 +110,17 @@ if 1 == 1 { } ``` + +## the comments of Trc + +Trc support two kinds of comments + +the first is use ```#```,from ```#``` to the end of the line belongs to the comment + +the second is use ```/**/```,this kind can cross the line,like: + +```cpp +/* +hello world! +*/ +``` diff --git a/rust/locales/en/LC_MESSAGES/trans.mo b/rust/locales/en/LC_MESSAGES/trans.mo deleted file mode 100644 index 65938fab..00000000 Binary files a/rust/locales/en/LC_MESSAGES/trans.mo and /dev/null differ diff --git a/rust/locales/en/LC_MESSAGES/trans.po b/rust/locales/en/LC_MESSAGES/trans.po deleted file mode 100644 index 9db271c0..00000000 --- a/rust/locales/en/LC_MESSAGES/trans.po +++ /dev/null @@ -1,2 +0,0 @@ -msgid "test" -msgstr "fuck" \ No newline at end of file diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.mo b/rust/locales/zh_CN/LC_MESSAGES/trans.mo index 82f52db8..d0511b6a 100644 Binary files a/rust/locales/zh_CN/LC_MESSAGES/trans.mo and b/rust/locales/zh_CN/LC_MESSAGES/trans.mo differ diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.po b/rust/locales/zh_CN/LC_MESSAGES/trans.po index 4a765abd..c761e241 100644 --- a/rust/locales/zh_CN/LC_MESSAGES/trans.po +++ b/rust/locales/zh_CN/LC_MESSAGES/trans.po @@ -1,2 +1,18 @@ -msgid "test" -msgstr "草" \ No newline at end of file +msgid "" +msgstr "" +"Content-Type: text/plain; charset=UTF-8\n" + +msgid "Welcome to tshell.Type help() to get more infomation" +msgstr "欢迎使用tshell。输入help()获取更多信息" + +msgid "SyntaxError" +msgstr "语法错误" + +msgid "OperatorError" +msgstr "操作符错误" + +msgid "this string should be ended with {}" +msgstr "这个字符串应当以{}结束" + +msgid "operator {} is not supported for type {}" +msgstr "操作符{}不支持类型{}" diff --git a/rust/src/base/error.rs b/rust/src/base/error.rs index a7c6d0bf..b14f7765 100644 --- a/rust/src/base/error.rs +++ b/rust/src/base/error.rs @@ -1,17 +1,36 @@ -enum ErrorType { - SyntaxError, -} +use gettextrs::gettext; +use std::process::exit; + +const EXIT_FAILURE: i32 = 1; + +pub const SYNTAX_ERROR: &str = "SyntaxError"; +pub const OPERATOR_ERROR: &str = "OperatorError"; +pub const STRING_WITHOUT_END: &str = "this string should be ended with {}"; +pub const OPERATOR_IS_NOT_SUPPORT: &str = "operator {} is not supported for type {}"; pub struct ErrorInfo { pub message: String, - errot_type: ErrorType, + errot_type: &'static str, } impl ErrorInfo { - pub fn new(message: String) -> ErrorInfo { + pub fn new(message: String, error_type: &'static str) -> ErrorInfo { ErrorInfo { message, - errot_type: ErrorType::SyntaxError, + errot_type: error_type, } } } + +pub trait ErrorContent { + fn get_module_name(&self) -> &str; + + fn get_line(&self) -> usize; +} + +pub fn report_error(content: &impl ErrorContent, info: ErrorInfo) { + eprintln!("Error in line {}", content.get_line()); + eprintln!("In module {}", content.get_module_name()); + eprintln!("{}:{}", gettext(info.errot_type), info.message); + exit(EXIT_FAILURE); +} diff --git a/rust/src/cfg.rs b/rust/src/cfg.rs new file mode 100644 index 00000000..25a01f3b --- /dev/null +++ b/rust/src/cfg.rs @@ -0,0 +1,3 @@ +//! some constant values and configurations in trc + +pub const MAIN_MODULE_NAME: &str = "main"; diff --git a/rust/src/compiler.rs b/rust/src/compiler.rs index adca39e7..0274f029 100644 --- a/rust/src/compiler.rs +++ b/rust/src/compiler.rs @@ -1,32 +1,257 @@ +//! reference iterator:https://stackoverflow.com/questions/43952104/how-can-i-store-a-chars-iterator-in-the-same-struct-as-the-string-it-is-iteratin +//! reference float hash map:https://www.soinside.com/question/tUJxYmevbVSHZYe2C2AK5o + mod token; -use std::collections::hash_set; -use std::io; +use self::token::TokenLex; +use crate::base::error; +use crate::cfg; +use crate::tvm::ConstPool; +use std::collections::hash_map; +use std::io::BufRead; +use std::{fs, io, vec}; + +#[derive(Debug)] +pub enum InputSource { + File(String), + StringInternal, +} + +pub struct Option { + optimize: bool, + inputsource: InputSource, +} + +pub struct Content { + module_name: String, + line: usize, +} + +impl error::ErrorContent for Content { + fn get_module_name(&self) -> &str { + &self.module_name + } + + fn get_line(&self) -> usize { + self.line + } +} + +impl Content { + pub fn new(module_name: &str) -> Self { + Self { + module_name: String::from(module_name), + line: 1, + } + } + + pub fn add_line(&mut self) { + self.line += 1; + } + + pub fn del_line(&mut self) { + self.line -= 1; + } +} + +impl Option { + pub fn new(optimize: bool, source: InputSource) -> Self { + Option { + optimize, + inputsource: source, + } + } +} + +#[derive(Hash, Eq, PartialEq)] +pub struct Float { + front:i32, + back:i32 +} + +impl Float { + fn new(front:i32, back:i32) -> Self { + Self { + front, + back + } + } +} pub struct ValuePool { - const_ints: hash_set::HashSet, + const_ints: hash_map::HashMap, + const_strings: hash_map::HashMap, + const_floats: hash_map::HashMap } +const INT_VAL_POOL_ZERO:usize = 0; +const INT_VAL_POOL_ONE:usize = 1; + impl ValuePool { fn new() -> Self { + let mut ret = Self { + const_ints: hash_map::HashMap::new(), + const_floats: hash_map::HashMap::new(), + const_strings: hash_map::HashMap::new() + }; + ret.add_int(0); + ret.add_int(1); + ret + } + + fn add_int(&mut self, val:i64) -> usize { + let len_tmp = self.const_ints.len(); + *self.const_ints.entry(val).or_insert(len_tmp) + } + + fn add_string(&mut self, val:String) -> usize { + let len_tmp = self.const_strings.len(); + *self.const_strings.entry(val).or_insert(len_tmp) + } + + fn add_float(&mut self, val:Float) -> usize { + let len_tmp = self.const_floats.len(); + *self.const_floats.entry(val).or_insert(len_tmp) + } + + fn store_val_to_vm(&mut self) -> ConstPool { + let mut ret = ConstPool::new(); + ret.intpool.resize(self.const_ints.len(), 0); + for i in &self.const_ints { + ret.intpool[*i.1] = *i.0; + } + + ret + } +} + +#[derive(Debug)] +pub struct StringSource { + text: String, + pos: usize, + prev_size: usize, +} + +impl StringSource { + fn new(source: String) -> Self { Self { - const_ints: hash_set::HashSet::new(), + text: source, + pos: 0, + prev_size: 0, + } + } +} + +impl TokenIo for StringSource { + fn unread(&mut self, c: char) { + self.pos -= self.prev_size; + } + + fn read(&mut self) -> char { + let mut opt = self.text[self.pos..].chars(); + match opt.next() { + None => '\0', + Some(c) => { + let sz = c.len_utf8(); + self.prev_size = sz; + self.pos += sz; + c + } + } + } +} + +trait TokenIo { + fn unread(&mut self, c: char); + + fn read(&mut self) -> char; +} + +pub struct FileSource { + back: Vec, + buf: io::BufReader, + s: String, + input_pos: usize, +} + +impl FileSource { + pub fn new(f: fs::File) -> Self { + let buf = io::BufReader::new(f); + let s = String::new(); + FileSource { + back: vec![], + buf, + input_pos: 0, + s, } } } -pub struct Compiler { - input: T, +impl TokenIo for FileSource { + fn unread(&mut self, c: char) { + self.back.push(c); + } + + fn read(&mut self) -> char { + if !self.back.is_empty() { + return self.back.pop().unwrap(); + } + loop { + let mut input_pos = self.s[self.input_pos..].chars(); + match input_pos.next() { + None => { + self.s.clear(); + self.buf.read_line(&mut self.s).unwrap(); + self.input_pos = 0; + } + Some(c) => { + self.input_pos += c.len_utf8(); + return c; + } + } + } + } +} + +pub struct Compiler { + // to support read from stdin and file + input: Box, line: usize, const_pool: ValuePool, + option: Option, + content: Content, } -impl Compiler { - fn new(f: T) -> Self { +impl Compiler { + fn new(option: Option) -> Self { + match option.inputsource { + InputSource::File(ref filename) => { + let f = std::fs::File::open(filename); + Compiler { + input: Box::new(FileSource::new(f.unwrap())), + line: 1, + const_pool: ValuePool::new(), + option, + content: Content::new(cfg::MAIN_MODULE_NAME), + } + } + _ => { + panic!("Compiler construct Error, used {:?}", option.inputsource); + } + } + } + + fn new_string_compiler(option: Option, source: &str) -> Self { Compiler { - input: f, + input: Box::new(StringSource::new(String::from(source))), line: 1, const_pool: ValuePool::new(), + option, + content: Content::new(cfg::MAIN_MODULE_NAME), } } + + fn lex(&mut self) { + let token_lexer = TokenLex::new(self); + } } diff --git a/rust/src/compiler/token.rs b/rust/src/compiler/token.rs index d1e738ac..833ce350 100644 --- a/rust/src/compiler/token.rs +++ b/rust/src/compiler/token.rs @@ -1,12 +1,38 @@ -use std::str::CharIndices; +use gettextrs::gettext; -use super::Compiler; -use std::io; +use crate::base::error; +use super::{Compiler, INT_VAL_POOL_ZERO}; #[derive(PartialEq, Debug)] enum TokenType { + // . DOT, + // , COMMA, + // { + LEFT_BIG_BRACE, + // } + RIGHT_BIG_BRACE, + // [ + LEFT_MIDDLE_BRACE, + // ] + RIGHT_MIDDLE_BRACE, + // ( + LEFT_SMALL_BRACE, + // ) + RIGHT_SMALL_BRACE, + // + + ADD, + // - + SUB, + // * + MUL, + // / + DIV, + // % + MOD, + // // + EXACT_DIVISION, INT_VALUE, STRING_VALUE, FLOAT_VALUE, @@ -16,22 +42,18 @@ enum TokenType { #[derive(PartialEq, Debug)] pub enum Data { - Int(i32), - Str(String), - FLOAT(f64), + Ind(usize), NONEDATA, } #[derive(PartialEq, Debug)] -struct Token { +pub struct Token { tp: TokenType, data: Data, } -struct TokenLex<'code, T: io::Read> { - code: &'code str, - pos: CharIndices<'code>, - compiler_data: &'code mut Compiler, +pub struct TokenLex<'code> { + compiler_data: &'code mut Compiler, } impl Token { @@ -46,26 +68,28 @@ impl Token { } } -impl Iterator for TokenLex<'_, T> { +impl Iterator for TokenLex<'_> { type Item = Token; fn next(&mut self) -> Option { self.next_token() } } -impl TokenLex<'_, T> { - fn new<'a>(code: &'a str, compiler_data: &'a mut Compiler) -> TokenLex<'a, T> { - TokenLex { - code, - pos: code.char_indices(), - compiler_data, - } +impl TokenLex<'_> { + pub fn new<'a>(compiler_data: &'a mut Compiler) -> TokenLex<'a> { + TokenLex { compiler_data } } fn lex_symbol(&mut self, c: char) -> Token { match c { '.' => Token::new(TokenType::DOT, None), ',' => Token::new(TokenType::COMMA, None), + '{' => Token::new(TokenType::LEFT_BIG_BRACE, None), + '}' => Token::new(TokenType::RIGHT_BIG_BRACE, None), + '[' => Token::new(TokenType::LEFT_MIDDLE_BRACE, None), + ']' => Token::new(TokenType::RIGHT_MIDDLE_BRACE, None), + '(' => Token::new(TokenType::LEFT_SMALL_BRACE, None), + ')' => Token::new(TokenType::RIGHT_SMALL_BRACE, None), _ => panic!("Not a symbol.Compiler error"), } } @@ -77,44 +101,40 @@ impl TokenLex<'_, T> { let mut radix = 10; let presecnt_lex; if c == '0' { - presecnt_lex = self.pos.next(); + presecnt_lex = self.compiler_data.input.read(); match presecnt_lex { - Some(c) => { - let c = c.1; - match c { - 'x' | 'X' => { - s += "0x"; - radix = 16; - } - 'b' | 'B' => { - s += "0b"; - radix = 2; - } - 'o' | 'O' => { - s += "0o"; - radix = 8; - } - _ => {} - } - } - None => { - return Token::new(TokenType::INT_VALUE, Some(Data::Int(0))); + '\0' => { + return Token::new(TokenType::INT_VALUE, Some(Data::Ind(INT_VAL_POOL_ZERO))); } + _ => match presecnt_lex { + 'x' | 'X' => { + s += "0x"; + radix = 16; + } + 'b' | 'B' => { + s += "0b"; + radix = 2; + } + 'o' | 'O' => { + s += "0o"; + radix = 8; + } + _ => {} + }, } } else { s = c.to_string(); } loop { - match self.pos.next() { - None => { + match self.compiler_data.input.read() { + '\0' => { break; } - Some(c) => { - let c = c.1; + c => { if c.is_digit(radix) { s.push(c); } else { - self.pos.next_back(); + self.compiler_data.input.unread(c); break; } } @@ -122,43 +142,67 @@ impl TokenLex<'_, T> { } Token::new( TokenType::INT_VALUE, - Some(Data::Int(s.parse().expect("wrong string to int"))), + Some(Data::Ind(self.compiler_data.const_pool.add_int(s.parse().expect("wrong string to int")))), ) } - fn lex_str(&mut self) -> Token { + fn lex_str(&mut self, start_char: char) -> Token { let mut s = String::new(); - loop {} + let mut c = self.compiler_data.input.read(); + while c != start_char { + if c == '\\' { + c = self.compiler_data.input.read(); + c = match c { + 't' => '\t', + 'n' => '\n', + '\\' => '\\', + '"' => '"', + '\'' => '\'', + _ => { + s.push('\\'); + c + } + } + } + s.push(c); + c = self.compiler_data.input.read(); + if c == '\0' { + error::report_error( + &self.compiler_data.content, + error::ErrorInfo::new( + gettext!(error::STRING_WITHOUT_END, start_char), + error::SYNTAX_ERROR, + ), + ); + } + } + Token::new(TokenType::STRING_VALUE, Some(Data::Ind(self.compiler_data.const_pool.add_string(s)))) } fn next_token(&mut self) -> Option { - let mut presecnt_lex = self.pos.next(); + let mut presecnt_lex = self.compiler_data.input.read(); loop { - presecnt_lex = self.pos.next(); match presecnt_lex { - Some(c) => { - let c = c.1; - match c { - '\t' | ' ' => { - continue; - } - '\n' => { - self.compiler_data.line += 1; - } - _ => break, - } - } - None => { + '\0' => { return None; } + c => match c { + '\t' | ' ' => { + continue; + } + '\n' => { + self.compiler_data.line += 1; + } + _ => break, + }, } + presecnt_lex = self.compiler_data.input.read(); } - let presecnt_lex = presecnt_lex.unwrap().1; if presecnt_lex.is_digit(10) { return Some(self.lex_num(presecnt_lex)); } if presecnt_lex == '\'' || presecnt_lex == '"' { - return Some(self.lex_str()); + return Some(self.lex_str(presecnt_lex)); } Some(self.lex_symbol(presecnt_lex)) } @@ -166,9 +210,11 @@ impl TokenLex<'_, T> { #[cfg(test)] mod tests { + use crate::compiler::{InputSource, Option}; + use super::*; - fn check(tokenlex: &mut TokenLex, expected_res: Vec) { + fn check(tokenlex: &mut TokenLex, expected_res: Vec) { for i in expected_res { assert_eq!(i, tokenlex.next().unwrap()); } @@ -177,8 +223,8 @@ mod tests { #[test] fn test_numberlex() { - let mut env = Compiler::new(io::stdin()); - let mut t = TokenLex::new( + let mut env = Compiler::new_string_compiler( + Option::new(false, InputSource::StringInternal), r#",,.,100 @@ -186,20 +232,20 @@ mod tests { 0b011 0x2aA4 0o2434 0 0"#, - &mut env, ); + let mut t = TokenLex::new(&mut env); let res = vec![ Token::new(TokenType::COMMA, None), Token::new(TokenType::COMMA, None), Token::new(TokenType::DOT, None), Token::new(TokenType::COMMA, None), - Token::new(TokenType::FLOAT_VALUE, Some(Data::FLOAT(123.9))), - Token::new(TokenType::INT_VALUE, Some(Data::Int(232_304904))), - Token::new(TokenType::INT_VALUE, Some(Data::Int(0b011))), - Token::new(TokenType::INT_VALUE, Some(Data::Int(0x2aA4))), - Token::new(TokenType::INT_VALUE, Some(Data::Int(0o2434))), - Token::new(TokenType::INT_VALUE, Some(Data::Int(0))), - Token::new(TokenType::INT_VALUE, Some(Data::Int(0))), + Token::new(TokenType::FLOAT_VALUE, Some(Data::Ind(0))), + Token::new(TokenType::INT_VALUE, Some(Data::Ind(1))), + Token::new(TokenType::INT_VALUE, Some(Data::Ind(2))), + Token::new(TokenType::INT_VALUE, Some(Data::Ind(3))), + Token::new(TokenType::INT_VALUE, Some(Data::Ind(4))), + Token::new(TokenType::INT_VALUE, Some(Data::Ind(INT_VAL_POOL_ZERO))), + Token::new(TokenType::INT_VALUE, Some(Data::Ind(INT_VAL_POOL_ZERO))), ]; check(&mut t, res); } @@ -207,11 +253,28 @@ mod tests { #[test] fn test_symbol_lex() {} + #[test] + fn test_string_lex() { + let mut env = Compiler::new_string_compiler( + Option::new(false, InputSource::StringInternal), + r#""s"'sd''sdscdcdfvf'"depkd"''"\n\t"'ttt\tt'"#, + ); + let res = vec![ + Token::new(TokenType::STRING_VALUE, Some(Data::Ind(0))) + ]; + } + + #[test] + fn test_comprehensive_lex() {} + #[test] #[should_panic] fn test_wrong_number() { - let mut env = Compiler::new(io::stdin()); - let t = TokenLex::new(r#"0xtghhy 0b231"#, &mut env); + let mut env = Compiler::new_string_compiler( + Option::new(false, InputSource::StringInternal), + r#"0xtghhy 0b231"#, + ); + let t = TokenLex::new(&mut env); for _ in t {} } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 8b137891..df0259ec 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1 +1,37 @@ +pub mod base; +pub mod cfg; +pub mod compiler; +pub mod tools; +pub mod tvm; +use clap::{self, Parser, Subcommand}; + +#[derive(Debug, Parser)] +#[command(author="limuy", version="0.1", about, long_about = None)] +#[command(propagate_version = true)] +struct Args { + #[command(subcommand)] + mode: Commands, + #[arg()] + files: Vec, +} + +#[derive(Debug, Subcommand)] +enum Commands { + build { optimize: bool }, + tshell {}, +} + +pub fn run() { + let cli = Args::parse(); + match cli.mode { + Commands::build { optimize: opt } => { + for i in cli.files { + tools::compile(compiler::Option::new(opt, compiler::InputSource::File(i))); + } + } + Commands::tshell {} => { + tools::tshell::tshell(); + } + }; +} diff --git a/rust/src/main.rs b/rust/src/main.rs index 9bf1fc74..a3bd3f7f 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -1,25 +1,9 @@ -pub mod base; -pub mod compiler; -pub mod tools; -pub mod tvm; - -use std::env::args; -use std::process::exit; +use trc::run; fn main() -> Result<(), Box> { gettextrs::setlocale(gettextrs::LocaleCategory::LcAll, ""); gettextrs::bindtextdomain("trans", "locales")?; gettextrs::textdomain("trans")?; - let mode: String = match args().nth(1) { - Some(tr) => tr, - _ => { - println!("A mode isn't given."); - exit(0); - } - }; - if mode == "build" { - tools::compile(); - } else if mode == "run" { - } + run(); Ok(()) } diff --git a/rust/src/tools.rs b/rust/src/tools.rs index 1d39d6b0..4e7db415 100644 --- a/rust/src/tools.rs +++ b/rust/src/tools.rs @@ -1,4 +1,5 @@ pub mod compile_tool; +pub mod tshell; pub use compile_tool::compile; pub fn run() {} diff --git a/rust/src/tools/compile_tool.rs b/rust/src/tools/compile_tool.rs index 61c54ec9..69ab950c 100644 --- a/rust/src/tools/compile_tool.rs +++ b/rust/src/tools/compile_tool.rs @@ -1,10 +1,3 @@ -use clap::Parser; +use crate::compiler; -#[derive(Parser)] -pub struct ParamsCil { - pattern: String, -} - -pub fn compile() { - let args = ParamsCil::parse(); -} +pub fn compile(opt: compiler::Option) {} diff --git a/rust/src/tools/tshell.rs b/rust/src/tools/tshell.rs new file mode 100644 index 00000000..61bdbcab --- /dev/null +++ b/rust/src/tools/tshell.rs @@ -0,0 +1,20 @@ +//! the read execute print loop for trc + +use colored::*; +use gettextrs::gettext; +use std::io::{self, Write}; + +fn get_block() {} + +pub fn tshell() { + println!( + "{}\n\n", + gettext("Welcome to tshell.Type help() to get more infomation").bold() + ); + loop { + print!("tshell>"); + io::stdout().flush().unwrap(); + let mut line = String::new(); + io::stdin().read_line(&mut line).unwrap(); + } +} diff --git a/rust/src/tvm.rs b/rust/src/tvm.rs index 11dc002a..ac754715 100644 --- a/rust/src/tvm.rs +++ b/rust/src/tvm.rs @@ -1,10 +1,16 @@ -mod types; mod algo; +mod function; +mod types; + +use crate::{ + base::error::ErrorContent, + cfg, +}; pub struct ConstPool { - intpool: Vec, - stringpool: Vec, - floatpool: Vec, + pub intpool: Vec, + pub stringpool: Vec, + pub floatpool: Vec, } impl ConstPool { @@ -17,14 +23,64 @@ impl ConstPool { } } +pub struct DynaData<'a> { + obj_stack: Vec>, + frames_stack: Vec>, +} + +impl<'a> DynaData<'a> { + pub fn new() -> Self { + Self { + obj_stack: Vec::new(), + frames_stack: vec![], + } + } +} + pub struct Inst { opcode: Opcode, operand: i32, } -pub struct Vm { +pub struct Vm<'a> { constpool: ConstPool, inst: Vec, + funcs: Vec, + run_contnet: Content, + dynadata: DynaData<'a>, + pc: usize, +} + +struct Content { + module_name: String, + line_pos: usize, +} + +impl ErrorContent for Content { + fn get_module_name(&self) -> &str { + &self.module_name + } + + fn get_line(&self) -> usize { + self.line_pos + } +} + +impl Content { + fn new(module_name: &str) -> Self { + Content { + module_name: String::from(module_name), + line_pos: 0, + } + } + + fn add_line(&mut self) { + self.line_pos += 1; + } + + fn del_line(&mut self) { + self.line_pos -= 1; + } } enum Opcode { @@ -42,16 +98,45 @@ enum Opcode { And, Or, Not, + // change the option code index Goto, + // return from a function + PopFrame, + // create a frame to hold the function + NewFrame, } -impl Vm { +impl<'a> Vm<'a> { pub fn new() -> Self { Self { constpool: ConstPool::new(), inst: Vec::new(), + pc: 0, + funcs: vec![], + dynadata: DynaData::new(), + run_contnet: Content::new(cfg::MAIN_MODULE_NAME), } } - pub fn run(&mut self) {} + pub fn run(&mut self) { + while self.pc < self.inst.len() { + match self.inst[self.pc].opcode { + Opcode::Add => { + let t1 = self.dynadata.obj_stack.pop(); + let t2 = self.dynadata.obj_stack.pop(); + if t1.is_none() || t2.is_none() {} + } + Opcode::Div => {} + Opcode::Gt => {} + Opcode::NewFrame => {} + Opcode::PopFrame => { + self.dynadata.frames_stack.pop(); + } + _ => { + panic!("unknown opcode"); + } + } + self.pc += 1; + } + } } diff --git a/rust/src/tvm/algo.rs b/rust/src/tvm/algo.rs index 59032bee..09da2971 100644 --- a/rust/src/tvm/algo.rs +++ b/rust/src/tvm/algo.rs @@ -1 +1 @@ -mod string; \ No newline at end of file +mod string; diff --git a/rust/src/tvm/algo/string.rs b/rust/src/tvm/algo/string.rs index f76c40b5..75a597d2 100644 --- a/rust/src/tvm/algo/string.rs +++ b/rust/src/tvm/algo/string.rs @@ -1,10 +1,11 @@ +use std::collections::HashMap; -pub fn kmp(main_string:&str, pattern:&str) -> usize { +pub fn kmp(main_string: &str, pattern: &str) -> usize { // 首先对模式串构建next数组 let next_arr = kmp_next(pattern); - let mut j:i64 = -1; + let mut j: i64 = -1; // 我也不想把字符串先转换出来,但是必须要这么做 - let pattern:Vec = pattern.chars().collect(); + let pattern: Vec = pattern.chars().collect(); let mut ans = 0; for i in main_string.chars() { while j != -1 && pattern[(j + 1) as usize] == i { @@ -20,21 +21,16 @@ pub fn kmp(main_string:&str, pattern:&str) -> usize { ans } -/// build kmp's next array -/// # Example -/// ``` -/// let next = kmp_next("ababab"); -/// ``` -pub fn kmp_next(pattern:&str) -> Vec { - let mut j:i64 = -1; +pub fn kmp_next(pattern: &str) -> Vec { + let mut j: i64 = -1; let mut ret: Vec = vec![-1]; - let pattern:Vec = pattern.chars().collect(); + let pattern: Vec = pattern.chars().collect(); // 从1开始匹配是因为第零个不需要匹配 for i in 1..pattern.len() { - while j != -1 && pattern[(j + 1)as usize] != pattern[i] { + while j != -1 && pattern[(j + 1) as usize] != pattern[i] { j = ret[j as usize] as i64; } - if pattern[(j + 1) as usize ] == pattern[i] { + if pattern[(j + 1) as usize] == pattern[i] { j += 1; } ret.push(j); @@ -42,14 +38,13 @@ pub fn kmp_next(pattern:&str) -> Vec { ret } -/// Suffix Array -/// # Example -/// ``` -/// let s="dkodkoe"; -/// let sarray = sa(s) -/// ``` -pub fn sa(s:&str) { - +pub fn sa(s: &str) { + let mut sa: Vec = Vec::new(); + let mut t: HashMap = HashMap::new(); + for i in s.chars() { + let tmp = t.entry(i).or_insert(0); + *tmp += 1; + } } #[cfg(test)] @@ -64,10 +59,24 @@ mod tests { #[test] fn kmp_1() { assert_eq!(kmp("ABABABC", "ABA"), 2); + assert_eq!( + kmp( + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + ), + 16 + ); + assert_eq!( + kmp( + "asdfasfababdababaadfasababdababagsdgababdababa", + "ababdababa" + ), + 3 + ); } #[test] fn kmp_next_1() { - assert_eq!(kmp_next("ABA"), vec![0, 0, 1]); + assert_eq!(kmp_next("ABA"), vec![-1, -1, 0]); } } diff --git a/rust/src/tvm/function.rs b/rust/src/tvm/function.rs new file mode 100644 index 00000000..3301be75 --- /dev/null +++ b/rust/src/tvm/function.rs @@ -0,0 +1,20 @@ +pub struct Func { + name: String, +} + +impl Func { + fn new(name: String) -> Self { + Self { name } + } +} + +/// A content structure which hold the running info of the function +pub struct Frame<'a> { + name: &'a str, +} + +impl<'a> Frame<'a> { + fn new(func: &'a Func) -> Self { + Self { name: &func.name } + } +} diff --git a/rust/src/tvm/types.rs b/rust/src/tvm/types.rs index 3de2dd25..e1e0ae62 100644 --- a/rust/src/tvm/types.rs +++ b/rust/src/tvm/types.rs @@ -1,5 +1,20 @@ +use crate::base::error; +use gettextrs::gettext; + mod data_structure; mod trcfloat; mod trcint; -mod trcobj; mod trcstr; + +pub trait TrcObj { + fn output(&self) {} + + fn add(&self, _: Box) -> Result, error::ErrorInfo> { + Err(error::ErrorInfo::new( + gettext!(error::OPERATOR_IS_NOT_SUPPORT, "+", self.get_type_name()), + error::SYNTAX_ERROR, + )) + } + + fn get_type_name(&self) -> &str; +} diff --git a/rust/src/tvm/types/data_structure.rs b/rust/src/tvm/types/data_structure.rs index 07faa8c8..d3cb984d 100644 --- a/rust/src/tvm/types/data_structure.rs +++ b/rust/src/tvm/types/data_structure.rs @@ -1,6 +1,7 @@ mod ac; mod deque; mod fenwick; +mod forward_list; mod hash_map; mod list; mod map; diff --git a/rust/src/tvm/types/data_structure/ac.rs b/rust/src/tvm/types/data_structure/ac.rs index e4400999..46326d2e 100644 --- a/rust/src/tvm/types/data_structure/ac.rs +++ b/rust/src/tvm/types/data_structure/ac.rs @@ -55,7 +55,7 @@ impl AcAutomaton { // 正常的ac自动机会将剩余的失配部分也指向失配指针 // 但是这个字符集被设计为无限大,可以容纳unicode的ac自动机,所以不能这么做 // 会在匹配时顺着向上找失配指针 - self.states[*val].fail = if self.states[self.states[u].fail].next.contains_key(&c) { + self.states[*val].fail = if self.states[self.states[u].fail].next.contains_key(&c) { self.states[self.states[u].fail].next[&c] } else { 0 @@ -72,9 +72,7 @@ impl AcAutomaton { /// ac.search("world", 2); /// let ans = ac.get_ans(); /// ```` - fn search(&self, pattern: &str, id:u32) { - - } + fn search(&self, pattern: &str, id: u32) {} fn get_ans(&self) -> HashMap { HashMap::new() @@ -87,6 +85,5 @@ mod tests { #[test] fn ac_automaton1() { let mut ac = AcAutomaton::new(); - } } diff --git a/rust/src/tvm/types/data_structure/forward_list.rs b/rust/src/tvm/types/data_structure/forward_list.rs new file mode 100644 index 00000000..04c00b7f --- /dev/null +++ b/rust/src/tvm/types/data_structure/forward_list.rs @@ -0,0 +1,17 @@ +pub struct ForwardList { + sz: usize, +} + +impl ForwardList { + pub fn new() -> Self { + Self { sz: 0 } + } + + pub fn size(&self) -> usize { + self.sz + } + + pub fn empty(&self) -> bool { + self.size() == 0 + } +} diff --git a/rust/src/tvm/types/data_structure/hash_map.rs b/rust/src/tvm/types/data_structure/hash_map.rs index 7b9fc666..428acb6a 100644 --- a/rust/src/tvm/types/data_structure/hash_map.rs +++ b/rust/src/tvm/types/data_structure/hash_map.rs @@ -1,6 +1,4 @@ -pub struct HashMap { - -} +pub struct HashMap {} impl HashMap { pub fn new() -> HashMap { diff --git a/rust/src/tvm/types/data_structure/st.rs b/rust/src/tvm/types/data_structure/st.rs index 8b137891..953ca641 100644 --- a/rust/src/tvm/types/data_structure/st.rs +++ b/rust/src/tvm/types/data_structure/st.rs @@ -1 +1 @@ - +pub struct StTable {} diff --git a/rust/src/tvm/types/trcobj.rs b/rust/src/tvm/types/trcobj.rs deleted file mode 100644 index 8b137891..00000000 --- a/rust/src/tvm/types/trcobj.rs +++ /dev/null @@ -1 +0,0 @@ -