diff --git a/README.md b/README.md index 7d76a7d7..27241af7 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,12 @@ there are two ways to use it: They have their own adventages.You can choose by your preference. +The c++ version is the first version of trc.But after I uograded it to cpp 20 stardand.it bacame hard to compile and develop.And there is a lot of trouble codes and bugs in it.So I want to stop to develop it until the cpp 20 standard is more stable. + +The rust version is the second version of trc.It is under development now.It is designed better. + +I don't know which will be the main version.Maybe both? + ## International Trc supports many different kinds of languages.We use GNU gettext.So it dont't need change the language by hand. diff --git a/cpp/doc/developer/dirs.md b/cpp/doc/developer/dirs.md deleted file mode 100644 index f5fd7f4a..00000000 --- a/cpp/doc/developer/dirs.md +++ /dev/null @@ -1,29 +0,0 @@ -# each dir's usage - -doc:help - - doc/developer:developer doc,include infomation that is used to help to develop - - doc/use:trc usage - -tests:test cases,also good for studying - - tests/black_test:black tests - - tests/unittest:unit test - -bin:trc bin - -language:the language supports of trc - -src:source code - - src/include:header files - - src/Compiler:compiler - - src/base:basic tools system - - src/trc:cmd tools and main function - - src/TVM:virtaul machine that is used for running trc code diff --git a/cpp/doc/developer/system/Compiler/BNF.txt b/cpp/doc/developer/system/Compiler/BNF.txt deleted file mode 100644 index 7d2d58a2..00000000 --- a/cpp/doc/developer/system/Compiler/BNF.txt +++ /dev/null @@ -1,24 +0,0 @@ -item -> expr | func_call | value -left_value -> id | func_call -sentence -> id = item -sentence -> id := item -func_call -> id(argv) -argv -> argv,value | value - -expr -> expr + term | expr - term | term -term -> term * factor | term / factor | term ** factor | term % factor | factor -factor -> item | (expr) - -const_value -> string | int | float -value -> const_value | id - -ifsentence -> if item { - sentence -} -whilesentence -> while item { - sentence -} -sentnece -> ifsentence | whilesentence -sentence -> assert argv -sentence -> goto int -sentence -> func_call diff --git a/cpp/doc/developer/system/Compiler/Compiler.md b/cpp/doc/developer/system/Compiler/Compiler.md deleted file mode 100644 index 43dd80b4..00000000 --- a/cpp/doc/developer/system/Compiler/Compiler.md +++ /dev/null @@ -1,9 +0,0 @@ -# Compilation - -raw code ---> - -token ---> - -grammar(don't genarate actually) ---> - -genarate opcode diff --git a/cpp/doc/developer/system/TVM.md b/cpp/doc/developer/system/TVM.md deleted file mode 100644 index a8086632..00000000 --- a/cpp/doc/developer/system/TVM.md +++ /dev/null @@ -1,17 +0,0 @@ -# TVM structure - -## memory model: - -### first:TVM(virtual machine) -### second:GLOBAL_OBJS_POOL(global objects poll) -### third:MEMORY_POOL(global memory pool) - -## relation - -------------------------------------------------- - TVM -------------------------------------------------- - GLOBAL_OBJS_POOL -------------------------------------------------- - MEMORY_POOL -------------------------------------------------- diff --git a/cpp/doc/developer/system/Trc.md b/cpp/doc/developer/system/Trc.md deleted file mode 100644 index 590b4883..00000000 --- a/cpp/doc/developer/system/Trc.md +++ /dev/null @@ -1,24 +0,0 @@ -# Trc sysytem structure - -### enter:trc - -### Virtual Machine:TVM - -### Compiler - -### basic tools:base - -### language modules:chinese,english and so on - -### relation - ------------------------------------ - trc ------------------------------------ - Compiler | TVM ------------------------------------ - base ------------------------------------ - langauge ------------------------------------ - diff --git a/cpp/doc/use/ESC.md b/cpp/doc/use/ESC.md deleted file mode 100644 index 5e9971b8..00000000 --- a/cpp/doc/use/ESC.md +++ /dev/null @@ -1,9 +0,0 @@ -# Trc支持的转义字符 - -| 写法 | 含义 | -|:-----|----:| -| \n | 换行符 | -| \t | 制表符 | -| \' | 单引号 | -| \" | 双引号 | -| \\\\ | \符号 | diff --git a/cpp/doc/use/questions.md b/cpp/doc/use/questions.md deleted file mode 100644 index 6b8d4056..00000000 --- a/cpp/doc/use/questions.md +++ /dev/null @@ -1,5 +0,0 @@ -# Common Questions - -## Q:Windows 7/Windows XP and platforms that is lower than Windows 10 cannot print info with colors and even print wrong characters - -## A:Color info is printed by using ANSI feature,ANSI on Windows is open by enabling virtual terminal(windows 10 added),so os that is lower than Windows 10 cannot use this feature.We don't have a good solution for this,because trc won't support os this is ended support.Bur expect this feature,other core features won't be affected. diff --git a/cpp/doc/use/style tool.md b/cpp/doc/use/style tool.md deleted file mode 100644 index c3e7abfb..00000000 --- a/cpp/doc/use/style tool.md +++ /dev/null @@ -1,8 +0,0 @@ -# style follows the rules - -| style rules | -|:--------------------------------------------| -| space after comma | -| space between operators | -| don't wrap the text around after big braces | -| use four space to retract | diff --git a/rust/README.md b/rust/README.md index 4613bcb5..1afe4ea7 100644 --- a/rust/README.md +++ b/rust/README.md @@ -1,10 +1,8 @@ -# A new instance of Trc language +# The rust version of Trc language ## using language is 100% rust -Rust's modern and safe features make me very intesested in it and decided to learn it by developing this project. - -So this is just an experimental project and I will keep develop c++ version and rust version(maybe more kinds of compilers developing by other languages will be added in future) +As a modern language,rust provide us a lot of useful features and I enjoy myself in it.So this project is developed by pure rust. ## Build diff --git a/rust/docs/developer/EBNF.md b/rust/docs/developer/EBNF.md index b0ce1a15..4976dd04 100644 --- a/rust/docs/developer/EBNF.md +++ b/rust/docs/developer/EBNF.md @@ -7,9 +7,10 @@ statements : statements statement statement : ID := expr ID(argvs) +item : ID | int | float | string opt_argvs: argvs | empty argvs : argvs , argv | argv argv : expr expr : expr + term | expr - term term : term * factor | term / factor -factor : (expr) | ID +factor : (expr) | item diff --git a/rust/docs/usage.md b/rust/docs/usage.md index 21912d35..7f4cd829 100644 --- a/rust/docs/usage.md +++ b/rust/docs/usage.md @@ -142,9 +142,10 @@ hello world! ## the var of trc First,we support the UTF-8 with your var name.So you can define your var like this: + ```go 你好:=90 -``` +``` ## Data structures for Trc diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.mo b/rust/locales/zh_CN/LC_MESSAGES/trans.mo index 4a4fbf1f..836eac08 100644 Binary files a/rust/locales/zh_CN/LC_MESSAGES/trans.mo and b/rust/locales/zh_CN/LC_MESSAGES/trans.mo differ diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.po b/rust/locales/zh_CN/LC_MESSAGES/trans.po index cf011ce6..3a49af05 100644 --- a/rust/locales/zh_CN/LC_MESSAGES/trans.po +++ b/rust/locales/zh_CN/LC_MESSAGES/trans.po @@ -53,3 +53,13 @@ msgstr "在模块{}中" msgid "Error in line {}" msgstr "错误在第{}行" +msgid "SymbolError" +msgstr "符号错误" + +msgid "Symbol {} not found" +msgstr "未找到符号{}" + +msgid "Symbol {} redefined" +msgstr "符号{}重定义" + + diff --git a/rust/src/base/codegen.rs b/rust/src/base/codegen.rs index ca000937..b8f7e1fe 100644 --- a/rust/src/base/codegen.rs +++ b/rust/src/base/codegen.rs @@ -1,5 +1,8 @@ use super::func; +use core::cmp::max; +use std::fmt::Display; +#[derive(Debug)] pub enum Opcode { Add, Sub, @@ -23,7 +26,7 @@ pub enum Opcode { BitOr, BitLeftShift, BitRightShift, - // change the option code index + // change pc counter Goto, // return from a function PopFrame, @@ -31,6 +34,22 @@ pub enum Opcode { NewFrame, // Load a int from const pool LoadInt, + // Load a float from const pool + LoadFloat, + // Load a string from const pool + LoadString, + // Load a bigint from const pool + LoadBigInt, + // Load a local var to the stack + LoadLocal, + // Store a local var + StoreLocal, +} + +impl Display for Opcode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self) + } } pub struct ConstPool { @@ -54,10 +73,17 @@ pub struct Inst { pub operand: usize, } +impl Inst { + pub fn new(opcode: Opcode, operand: usize) -> Self { + Self { opcode, operand } + } +} + pub struct StaticData { pub constpool: ConstPool, pub inst: Vec, pub funcs: Vec, + pub sym_table_sz: usize, } impl StaticData { @@ -66,6 +92,11 @@ impl StaticData { constpool: ConstPool::new(), inst: vec![], funcs: vec![], + sym_table_sz: 0, } } + + pub fn update_sym_table_sz(&mut self, newsz: usize) { + self.sym_table_sz = max(self.sym_table_sz, newsz); + } } diff --git a/rust/src/base/error.rs b/rust/src/base/error.rs index 4a6fc8a8..ea988594 100644 --- a/rust/src/base/error.rs +++ b/rust/src/base/error.rs @@ -7,6 +7,7 @@ pub const OPERATOR_ERROR: &str = "OperatorError"; pub const VM_ERROR: &str = "VmError"; pub const ZERO_DIVSION_ERROR: &str = "ZeroDivisionError"; pub const NUMBER_OVER_FLOW: &str = "NumberOverFlowError"; +pub const SYMBOL_ERROR: &str = "SymbolError"; pub const STRING_WITHOUT_END: &str = "this string should be ended with {}"; pub const UNMATCHED_BRACE: &str = "{} is unmatched"; @@ -20,6 +21,8 @@ pub const FLOAT_OVER_FLOW: &str = "Float {} is too large to store"; pub const UNEXPECTED_TOKEN: &str = "token {} is not expected"; pub const ERROR_IN_LINE: &str = "Error in line {}"; pub const IN_MODULE: &str = "In module {}"; +pub const SYMBOL_NOT_FOUND: &str = "Symbol {} not found"; +pub const SYMBOL_REDEFINED: &str = "Symbol {} redefined"; #[derive(Debug)] pub struct ErrorInfo { diff --git a/rust/src/compiler.rs b/rust/src/compiler.rs index bcabd5c5..4918b75a 100644 --- a/rust/src/compiler.rs +++ b/rust/src/compiler.rs @@ -1,7 +1,8 @@ -//! reference iterator:https://stackoverflow.com/questions/43952104/how-can-i-store-a-chars-iterator-in-the-same-struct-as-the-string-it-is-iteratin -//! reference float hash map:https://www.soinside.com/question/tUJxYmevbVSHZYe2C2AK5o +//! reference iterator: +//! reference float hash map: mod ast; +pub mod scope; mod token; use self::token::TokenLex; @@ -76,7 +77,7 @@ impl Option { } } -#[derive(Hash, Eq, PartialEq, Clone)] +#[derive(Hash, Eq, PartialEq, Clone, Debug)] pub struct Float { front: u32, back: u32, @@ -116,11 +117,30 @@ pub struct ValuePool { const_floats: Pool, name_pool: Pool, const_big_int: Pool, + id_int: Vec, + id_float: Vec, + id_str: Vec, + id_name: Vec, } const INT_VAL_POOL_ZERO: usize = 0; const INT_VAL_POOL_ONE: usize = 1; +macro_rules! gen_add_funcs { + ($($func_name:ident => ($const_pool:ident, $id_pool:ident, $type:ty)),*) => { + $( + fn $func_name(&mut self, val: $type) -> usize { + let len_tmp = self.$const_pool.len(); + let ret = *self.$const_pool.entry(val.clone()).or_insert(len_tmp); + if len_tmp != self.$const_pool.len() { + self.$id_pool.push(val); + } + ret + } + )* + }; +} + impl ValuePool { fn new() -> Self { let mut ret = Self { @@ -129,34 +149,22 @@ impl ValuePool { const_strings: HashMap::new(), name_pool: HashMap::new(), const_big_int: HashMap::new(), + id_int: vec![], + id_float: vec![], + id_str: vec![], + id_name: vec![], }; ret.add_int(0); ret.add_int(1); ret } - fn add_int(&mut self, val: i64) -> usize { - let len_tmp = self.const_ints.len(); - *self.const_ints.entry(val).or_insert(len_tmp) - } - - fn string_get(pool: &mut Pool, str: String) -> usize { - let len_tmp = pool.len(); - *pool.entry(str).or_insert(len_tmp) - } - - fn add_string(&mut self, val: String) -> usize { - Self::string_get(&mut self.const_strings, val) - } - - fn add_float(&mut self, val: Float) -> usize { - let len_tmp = self.const_floats.len(); - *self.const_floats.entry(val).or_insert(len_tmp) - } - - fn add_id(&mut self, val: String) -> usize { - Self::string_get(&mut self.name_pool, val) - } + gen_add_funcs!( + add_int => (const_ints, id_int, i64), + add_float => (const_floats, id_float, Float), + add_string => (const_strings, id_str, String), + add_id => (name_pool, id_name, String) + ); fn store_val_to_vm(&mut self) -> ConstPool { let mut ret = ConstPool::new(); @@ -385,5 +393,8 @@ mod tests { assert_eq!(pool.add_string(String::from("value")), 0); assert_eq!(pool.add_string(String::from("value")), 0); assert_eq!(pool.add_string(String::from("vale")), 1); + assert_eq!(pool.id_int[0], 0); + assert_eq!(pool.id_float[0], Float::new(9, 0)); + assert_eq!(pool.id_str[1], "vale"); } } diff --git a/rust/src/compiler/ast.rs b/rust/src/compiler/ast.rs index 6a7cacb1..c9595322 100644 --- a/rust/src/compiler/ast.rs +++ b/rust/src/compiler/ast.rs @@ -1,18 +1,24 @@ -use super::{token::Token, Compiler, TokenLex}; +use super::scope::SymScope; +use super::{token::Token, TokenLex}; +use crate::base::codegen::{Inst, Opcode}; use crate::base::{codegen::StaticData, error::*}; -use clap::error; use gettextrs::gettext; +use std::cell::RefCell; +use std::rc::Rc; pub struct AstBuilder<'a> { token_lexer: TokenLex<'a>, staticdata: StaticData, + self_scope: Rc>, } impl<'a> AstBuilder<'a> { pub fn new(token_lexer: TokenLex<'a>) -> Self { + let mut root_scope = Rc::new(RefCell::new(SymScope::new(None))); AstBuilder { token_lexer, staticdata: StaticData::new(), + self_scope: root_scope, } } @@ -28,19 +34,64 @@ impl<'a> AstBuilder<'a> { Ok(()) } + fn expr(&mut self) -> RunResult<()> { + Ok(()) + } + fn statement(&mut self, mut t: Token) -> RunResult<()> { match t.tp { super::token::TokenType::ID => { - t = self.token_lexer.next_token()?; - match t.tp { - super::token::TokenType::Assign => {} - super::token::TokenType::Store => {} + let name = t.data.unwrap(); + let tt = self.token_lexer.next_token()?; + match tt.tp { + super::token::TokenType::Assign => { + let var = self.self_scope.as_ref().borrow().get_sym_idx(name); + if let None = var { + return Err(RuntimeError::new( + Box::new(self.token_lexer.compiler_data.content.clone()), + ErrorInfo::new( + gettext!( + SYMBOL_NOT_FOUND, + self.token_lexer.compiler_data.const_pool.id_name[name] + ), + gettext(SYMBOL_ERROR), + ), + )); + } + self.expr()?; + let var = var.unwrap(); + self.staticdata + .inst + .push(Inst::new(Opcode::StoreLocal, var)) + } + super::token::TokenType::Store => { + if self.self_scope.as_ref().borrow().has_sym(name) { + return Err(RuntimeError::new( + Box::new(self.token_lexer.compiler_data.content.clone()), + ErrorInfo::new( + gettext!( + SYMBOL_REDEFINED, + self.token_lexer.compiler_data.const_pool.id_name[name] + ), + gettext(SYMBOL_ERROR), + ), + )); + } + self.expr()?; + let var_idx = self.self_scope.as_ref().borrow_mut().insert_sym(name); + self.staticdata.update_sym_table_sz( + self.self_scope.as_ref().borrow().get_scope_last_idx(), + ); + self.staticdata + .inst + .push(Inst::new(Opcode::StoreLocal, var_idx)) + } _ => { return Err(RuntimeError::new( Box::new(self.token_lexer.compiler_data.content.clone()), ErrorInfo::new( + gettext!(UNEXPECTED_TOKEN, tt.tp.to_string()), gettextrs::gettext(SYNTAX_ERROR), - gettext!(UNEXPECTED_TOKEN, t.tp.to_string()), ), )) } @@ -50,8 +101,8 @@ impl<'a> AstBuilder<'a> { return Err(RuntimeError::new( Box::new(self.token_lexer.compiler_data.content.clone()), ErrorInfo::new( - gettextrs::gettext(SYNTAX_ERROR), gettext!(UNEXPECTED_TOKEN, t.tp.to_string()), + gettextrs::gettext(SYNTAX_ERROR), ), )) } @@ -74,8 +125,25 @@ impl<'a> AstBuilder<'a> { #[cfg(test)] mod tests { use super::*; + use crate::compiler::Compiler; + + macro_rules! gen_test_env { + ($test_code:expr, $env_name:ident) => {{ + use crate::compiler::InputSource; + use crate::compiler::Option; + let mut compiler = Compiler::new_string_compiler( + Option::new(false, InputSource::StringInternal), + $test_code, + ); + let mut token_lexer = TokenLex::new(&mut compiler); + let mut $env_name = AstBuilder::new(token_lexer); + }}; + } + #[test] - fn test_assign() {} + fn test_assign() { + gen_test_env!(r#"a:=10"#, t); + } #[test] fn builtin_function_call() {} diff --git a/rust/src/compiler/scope.rs b/rust/src/compiler/scope.rs new file mode 100644 index 00000000..147cac21 --- /dev/null +++ b/rust/src/compiler/scope.rs @@ -0,0 +1,88 @@ +use lazy_static::lazy_static; +use std::{cell::RefCell, collections::HashMap, rc::Rc}; + +lazy_static! { + static ref VAR_TYPE: Vec = vec![ + "int".to_string(), + "float".to_string(), + "str".to_string(), + "bool".to_string(), + "bigint".to_string(), + ]; +} + +pub struct SymScope { + prev_scope: Option>>, + sym_map: HashMap, + scope_sym_id: usize, +} + +impl SymScope { + pub fn new(prev_scope: Option>>) -> Self { + let mut ret = Self { + prev_scope: prev_scope.clone(), + sym_map: HashMap::new(), + scope_sym_id: 0, + }; + match prev_scope { + Some(prev_scope) => { + ret.scope_sym_id = prev_scope.as_ref().borrow().scope_sym_id; + } + None => {} + } + ret + } + + pub fn has_sym(&self, id: usize) -> bool { + if self.sym_map.contains_key(&id) { + return true; + } + return match self.prev_scope { + Some(ref prev_scope) => prev_scope.as_ref().borrow().has_sym(id), + None => false, + }; + } + + pub fn insert_sym(&mut self, id: usize) -> usize { + let t = self.sym_map.entry(id).or_insert(self.scope_sym_id); + if *t == self.scope_sym_id { + self.scope_sym_id += 1; + } + return *t; + } + + pub fn get_sym_idx(&self, id: usize) -> Option { + let t = self.sym_map.get(&id); + match t { + None => { + return match self.prev_scope { + Some(ref prev_scope) => prev_scope.as_ref().borrow().get_sym_idx(id), + None => None, + } + } + Some(t) => { + return Some(*t); + } + } + } + + pub fn get_scope_last_idx(&self) -> usize { + self.scope_sym_id + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_scope() { + let root_scope = Rc::new(RefCell::new(SymScope::new(None))); + root_scope.as_ref().borrow_mut().insert_sym(1); + let mut son_scope = SymScope::new(Some(root_scope.clone())); + son_scope.insert_sym(2); + assert_eq!(son_scope.get_sym_idx(2), Some(1)); + drop(son_scope); + assert_eq!(root_scope.as_ref().borrow().get_sym_idx(1), Some(0)); + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 5b8370f3..a729b2db 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -7,7 +7,12 @@ pub mod tvm; use clap::{self, Parser, Subcommand}; #[derive(Debug, Parser)] -#[command(author="limuy", version="0.1", about, long_about = None)] +#[command( + author = "limuy", + version = "0.1", + about = "A powerful programming language", + long_about = "This programming language is powerful, effective, safe, easy-to-learn" +)] #[command(propagate_version = true)] struct Args { #[command(subcommand)] diff --git a/rust/src/tools.rs b/rust/src/tools.rs index 4e7db415..4bbb2c3d 100644 --- a/rust/src/tools.rs +++ b/rust/src/tools.rs @@ -2,4 +2,3 @@ pub mod compile_tool; pub mod tshell; pub use compile_tool::compile; -pub fn run() {} diff --git a/rust/src/tools/compile_tool.rs b/rust/src/tools/compile_tool.rs index 616c7446..c9a16c86 100644 --- a/rust/src/tools/compile_tool.rs +++ b/rust/src/tools/compile_tool.rs @@ -1,6 +1,5 @@ -use std::process::exit; - use crate::compiler; +use std::process::exit; pub fn compile(opt: compiler::Option) { let mut compiler = compiler::Compiler::new(opt); diff --git a/rust/src/tvm.rs b/rust/src/tvm.rs index 084e0634..317c7b13 100644 --- a/rust/src/tvm.rs +++ b/rust/src/tvm.rs @@ -4,18 +4,13 @@ mod function; mod gc; mod types; +use self::types::trcfloat::TrcFloat; +use self::types::trcint::TrcInt; +use self::types::trcstr::TrcStr; use crate::base::codegen::{self, StaticData}; +use crate::{base::error::*, cfg}; use gettextrs::gettext; -use crate::{ - base::error::{ - ErrorContent, ErrorInfo, RuntimeError, VM_DATA_NUMBER, VM_ERROR, VM_FRAME_EMPTY, - }, - cfg, -}; - -use self::types::trcint::TrcInt; - pub struct DynaData<'a> { obj_stack: Vec>, frames_stack: Vec>, @@ -130,7 +125,6 @@ impl<'a> Vm<'a> { pub fn new_init(static_data: StaticData) -> Self { Self { pc: 0, - dynadata: DynaData::new(), run_contnet: Content::new(cfg::MAIN_MODULE_NAME), static_data, @@ -180,6 +174,21 @@ impl<'a> Vm<'a> { codegen::Opcode::BitNot => unary_opcode!(bit_not, self), codegen::Opcode::BitLeftShift => binary_opcode!(bit_left_shift, self), codegen::Opcode::BitRightShift => binary_opcode!(bit_right_shift, self), + codegen::Opcode::LoadLocal => {} + codegen::Opcode::StoreLocal => {} + codegen::Opcode::LoadString => { + self.dynadata.obj_stack.push(Box::new(TrcStr::new( + &self.static_data.constpool.stringpool + [self.static_data.inst[self.pc].operand], + ))); + } + codegen::Opcode::LoadFloat => { + self.dynadata.obj_stack.push(Box::new(TrcFloat::new( + self.static_data.constpool.floatpool + [self.static_data.inst[self.pc].operand], + ))); + } + codegen::Opcode::LoadBigInt => {} } self.pc += 1; } diff --git a/rust/src/tvm/types/trcstr.rs b/rust/src/tvm/types/trcstr.rs index 5d905fc1..5833752c 100644 --- a/rust/src/tvm/types/trcstr.rs +++ b/rust/src/tvm/types/trcstr.rs @@ -17,3 +17,11 @@ impl Display for TrcStr { write!(f, "{}", self.value) } } + +impl TrcStr { + pub fn new(value: &str) -> Self { + Self { + value: value.to_string(), + } + } +}