From e98c232498a6f51dba67b5aadfd1070898704a1a Mon Sep 17 00:00:00 2001 From: limuy Date: Tue, 6 Feb 2024 09:46:44 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=97=A0=E9=99=90=E9=80=92?= =?UTF-8?q?=E5=BD=92=E5=92=8Cid=E9=94=99=E8=AF=AFbug,=E5=8A=A0=E5=85=A5?= =?UTF-8?q?=E6=B3=A8=E9=87=8A=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rust/Cargo.lock | 1 + rust/Cargo.toml | 1 + rust/derive/src/def_module.rs | 3 +- rust/derive/src/lib.rs | 71 +++++++++++++++--------- rust/examples/expr.trc | 5 +- rust/locales/zh_CN/LC_MESSAGES/trans.mo | Bin 2047 -> 2100 bytes rust/locales/zh_CN/LC_MESSAGES/trans.po | 5 +- rust/src/base/error.rs | 1 + rust/src/base/stdlib.rs | 32 ++++++----- rust/src/compiler/ast.rs | 9 +-- rust/src/compiler/scope.rs | 25 +++++---- rust/src/compiler/token.rs | 71 +++++++++++++++++------- rust/src/tvm/algo.rs | 2 +- rust/src/tvm/stdlib.rs | 3 +- rust/src/tvm/stdlib/algo.rs | 4 ++ 15 files changed, 154 insertions(+), 79 deletions(-) create mode 100644 rust/src/tvm/stdlib/algo.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 96695eb1..6f5a9143 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1293,6 +1293,7 @@ dependencies = [ "lazy_static", "llvm-sys", "num-bigint", + "once_cell", "rand", "reqwest", "tokio", diff --git a/rust/Cargo.toml b/rust/Cargo.toml index cdb2b282..1edba210 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -20,6 +20,7 @@ num-bigint = "0.4.4" reqwest = { version = "0.11.24", features = ["json", "multipart"] } tokio = { version = "1.36.0", features = ["full"] } llvm-sys = "170.0.1" +once_cell = "1.19.0" derive = { path = "./derive" } [profile.release] diff --git a/rust/derive/src/def_module.rs b/rust/derive/src/def_module.rs index c0c9ddd5..a052e122 100644 --- a/rust/derive/src/def_module.rs +++ b/rust/derive/src/def_module.rs @@ -106,7 +106,7 @@ pub fn def_impl(content: TokenStream) -> TokenStream { check_next_iter(&mut iter, "="); if let TokenTree::Group(x, ..) = iter.next().unwrap() { // println!("{}", x); - let mut iter = x.stream().into_iter(); + let iter = x.stream().into_iter(); for i in iter { if let TokenTree::Ident(x) = i { submodules @@ -151,6 +151,7 @@ pub fn def_impl(content: TokenStream) -> TokenStream { )* #( classes.insert(stringify!(#right_class).to_string(), #left_class::export_info()); + #left_class::gen_funcs_info(); )* #( submodules.insert(stringify!(#submodules).to_string(), #submodules::init()); diff --git a/rust/derive/src/lib.rs b/rust/derive/src/lib.rs index 8956fe56..a2a6a8d5 100644 --- a/rust/derive/src/lib.rs +++ b/rust/derive/src/lib.rs @@ -5,6 +5,7 @@ use syn::ImplItem; use syn::{ parse_macro_input, parse_str, Expr, Ident, ItemFn, ItemImpl, ItemStruct, Stmt, Type, Visibility, }; + mod def_module; mod function; @@ -32,7 +33,6 @@ pub fn trc_function(attr: TokenStream, input: TokenStream) -> TokenStream { new_stmts.push(i); continue; } - new_stmts.push( parse_str::(&format!( "dydata.obj_stack.push(Box::new({}));", @@ -69,7 +69,7 @@ pub fn trc_function(attr: TokenStream, input: TokenStream) -> TokenStream { return RustFunction::new(stringify!(#name), #function_path, IOType::new(ret_classes, #output)); } ); - println!("{}", rettmp.to_token_stream()); + // println!("{}", rettmp.to_token_stream()); rettmp.into() } @@ -108,25 +108,49 @@ pub fn trc_class(_: TokenStream, input: TokenStream) -> TokenStream { } } // export_info函数会调用method宏生成function_export函数 + // 目前的实现策略是先提供一个由once_cell储存的usize数,表示在类型表中的索引,里面储存该类型的Rc指针 + // 因为很可能某个函数的参数就是标准库中的某个类型,所以我们需要先将类型导入到class_table中 let ret = quote!(#input - use crate::base::stdlib::RustClass; - impl #name { - pub fn export_info() -> RustClass { - use std::collections::hash_map::HashMap; - use crate::compiler::scope::Var; - let mut members = HashMap::new(); - #( - members.insert(Var::new(stringify!(#members_ty), #members_ident)); - )* - let mut ret = RustClass::new( + use crate::base::stdlib::{RustClass, new_class_id, STD_CLASS_TABLE}; + use once_cell::sync::OnceCell; + impl #name { + pub fn init_info() -> usize { + use std::collections::hash_map::HashMap; + use crate::compiler::scope::Var; + let mut members = HashMap::new(); + #( + members.insert(Var::new(stringify!(#members_ty), #members_ident)); + )* + let classid = new_class_id(); + let mut ret = RustClass::new( stringify!(#name), members, - Self::function_export(), - Self::override_export() + None, + Self::override_export(), + classid ); - ret + STD_CLASS_TABLE.with(|std| { + std.borrow_mut().push(ret); + }); + // let funcs_info = Self::function_export() + // ret.functions = funcs_info; + classid + } + + pub fn gen_funcs_info() { + STD_CLASS_TABLE.with(|std| { + std.borrow_mut()[Self::export_info()].functions = Self::function_export(); + }); + } + + pub fn export_info() -> usize { + static ID: OnceCell = OnceCell::new(); + *ID.get_or_init(|| { + let id = Self::init_info(); + id + }) + } } - } ); // println!("{}", ret.to_string()); ret.into() @@ -144,18 +168,13 @@ pub fn trc_method(_: TokenStream, input: TokenStream) -> TokenStream { // println!("!!!!!!!!!!!!!!!!!!!!!!!!!!:{:#?}", name); let mut funcs = vec![]; for i in &input.items { - match i { - ImplItem::Fn(func) => { - if let Visibility::Public(_) = func.vis { - funcs.push( - parse_str::(&function::convent_to_info_func( - func.sig.ident.to_string(), - )) + if let ImplItem::Fn(func) = i { + if let Visibility::Public(_) = func.vis { + funcs.push( + parse_str::(&function::convent_to_info_func(func.sig.ident.to_string())) .unwrap(), - ); - } + ); } - _ => {} } } let ret = quote!( diff --git a/rust/examples/expr.trc b/rust/examples/expr.trc index e43912c1..cbba7316 100644 --- a/rust/examples/expr.trc +++ b/rust/examples/expr.trc @@ -1,3 +1,6 @@ +/* +test expr +*/ print(1+1) print("hello") -print("ppp"+1) +#print("ppp"+1) diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.mo b/rust/locales/zh_CN/LC_MESSAGES/trans.mo index 6c5851f3392adf9e76c7151cc30c582fe280b9bb..31e9c9597a6dfd12202884ea54e66144e2d794cc 100644 GIT binary patch delta 623 zcmX}pODIH97{Kw*Fqpi?$k3R^En!wRY$OYI7FH~*jaeuRd2E<05Q!p9<1tdQ@JMJh zQFhkKRv9aExiu+!;{T1CQ|JE9x#v6IdHqyeyXRj#1u>!c?fQj{)aN`tlyQeh8BU-Z z=h46o4B!=(;v-gJ8hh{)bzVolNImwTjt|>Lv4nK8`PYzr>?Ys@bwU7}73m_bdvfK3=f1MjdL)9Ar0 z_Tvv~qW;3axFOW}OSW;;yeGCPY^Q&ZdK2IJpY^4Z<7uJ*>PcEqCk|o_Mo>?-jQUKM zXy60-@B?*YuHxL4ir+Sf`iiXcUwD;V4!t?O-K}iifUC|P_9c5Aw+{F8SZE|N86Gl1 zkn+a delta 538 zcmX}pze_?<6u|N0OD#!#WA|sRIUXL zHWW0pM*1I!5X7@3C~7R~d+^nT^FH_8d(VCE-g6!LF*EhBJ1;bgh!UT~Fi~;wpe-6A z0bIfmZeR>|aRATJkN4P#Rh+>m)V%Q)kwHwL&d)opW4lO3_L?2Z;xr46Q4?zD!!K+_ zuUljigV=#d?7q=SrQ!I2S~a%&Z&X37+`DVV*Z((GUNCNr@~Ed`bZzfE-(>BhtC!M+T|Bjm Mg~rK#&T5PN0?VB~7ytkO diff --git a/rust/locales/zh_CN/LC_MESSAGES/trans.po b/rust/locales/zh_CN/LC_MESSAGES/trans.po index e3173c49..6e993b61 100644 --- a/rust/locales/zh_CN/LC_MESSAGES/trans.po +++ b/rust/locales/zh_CN/LC_MESSAGES/trans.po @@ -45,7 +45,7 @@ msgid "Float {} is too large to store" msgstr "浮点数{}超过了储存范围" msgid "token {} is not expected" -msgstr "token{}不是被期望的" +msgstr "token {}不是被期望的" msgid "In module {}" msgstr "在模块{}中" @@ -80,3 +80,6 @@ msgstr "期望{}.但是输入{}" msgid "Expect type {}.But given type {}" msgstr "期望类型{}.但是传入类型是{}" +msgid "unclosed comment" +msgstr "未闭合的注释" + diff --git a/rust/src/base/error.rs b/rust/src/base/error.rs index e5b4d9fa..1997f6b8 100644 --- a/rust/src/base/error.rs +++ b/rust/src/base/error.rs @@ -28,6 +28,7 @@ pub const SYMBOL_REDEFINED: &str = "Symbol {} redefined"; pub const TYPE_NOT_THE_SAME: &str = "Type {} and {} are not the same"; pub const ARGU_NUMBER: &str = "expect {}.But given {}"; pub const EXPECT_TYPE: &str = "Expect type {}.But given type {}"; +pub const UNCLODED_COMMENT: &str = "unclosed comment"; #[derive(Debug)] pub struct ErrorInfo { diff --git a/rust/src/base/stdlib.rs b/rust/src/base/stdlib.rs index 7caf826a..79eb6c5d 100644 --- a/rust/src/base/stdlib.rs +++ b/rust/src/base/stdlib.rs @@ -19,7 +19,7 @@ type StdlibFunc = fn(&mut DynaData) -> RuntimeResult<()>; #[derive(Clone, Debug)] pub struct IOType { - pub argvs_type: Vec, + pub argvs_type: Vec, pub return_type: TypeAllowNull, } @@ -32,14 +32,14 @@ pub struct RustFunction { } impl IOType { - pub fn new(argvs_type: Vec, return_type: TypeAllowNull) -> IOType { + pub fn new(argvs_type: Vec, return_type: TypeAllowNull) -> IOType { IOType { argvs_type, return_type, } } - pub fn check_argvs(&self, argvs: Vec) -> Result<(), ErrorInfo> { + pub fn check_argvs(&self, argvs: Vec) -> Result<(), ErrorInfo> { if argvs.len() != self.argvs_type.len() { return Err(ErrorInfo::new( gettextrs::gettext!(ARGU_NUMBER, self.argvs_type.len(), argvs.len()), @@ -47,10 +47,10 @@ impl IOType { )); } for i in 0..self.argvs_type.len() { - if self.argvs_type[i].is_any() { + if self.argvs_type[i] == 0 { continue; } - if self.argvs_type[i].get_id() != argvs[i].get_id() { + if self.argvs_type[i] != argvs[i] { return Err(ErrorInfo::new( gettextrs::gettext!(EXPECT_TYPE, self.argvs_type[i], argvs[i]), gettextrs::gettext(ARGUMENT_ERROR), @@ -146,15 +146,16 @@ impl RustClass { pub fn new( name: impl Into, members: HashMap, - functions: HashMap, + functions: Option>, overrides: HashMap, + id: usize, ) -> RustClass { RustClass { name: name.into(), members, - functions, + functions: functions.unwrap_or_else(|| HashMap::new()), overrides, - id: 0, + id, } } @@ -166,8 +167,8 @@ impl RustClass { self.members.insert(name.into(), attr); } - pub fn export_info() -> Self { - ANY_TYPE.clone() + pub fn export_info() -> usize { + 0 } } @@ -206,6 +207,11 @@ impl Display for RustClass { thread_local! { pub static STD_FUNC_TABLE: RefCell> = RefCell::new(vec![]); + pub static STD_CLASS_TABLE: RefCell> = RefCell::new(vec![]); +} + +pub fn new_class_id() -> usize { + STD_CLASS_TABLE.with(|std| std.borrow().len()) } impl RustFunction { @@ -227,7 +233,7 @@ pub struct Stdlib { pub name: String, pub sub_modules: HashMap, pub functions: HashMap, - pub classes: HashMap, + pub classes: HashMap, } impl Stdlib { @@ -235,7 +241,7 @@ impl Stdlib { name: impl Into, sub_modules: HashMap, functions: HashMap, - classes: HashMap, + classes: HashMap, ) -> Stdlib { Stdlib { name: name.into(), @@ -267,6 +273,6 @@ impl Stdlib { lazy_static! { pub static ref ANY_TYPE: RustClass = - RustClass::new("any", HashMap::new(), HashMap::new(), HashMap::new()); + RustClass::new("any", HashMap::new(), None, HashMap::new(), new_class_id()); pub static ref STDLIB_ROOT: Stdlib = crate::tvm::stdlib::init(); } diff --git a/rust/src/compiler/ast.rs b/rust/src/compiler/ast.rs index 5a6a3b73..c9041c33 100644 --- a/rust/src/compiler/ast.rs +++ b/rust/src/compiler/ast.rs @@ -42,7 +42,7 @@ macro_rules! tmp_expe_function_gen { if let TypeAllowNull::No = ty_now { return Ok(ty_now) } - if(ty_now.unwrap().get_id() == ty_after.unwrap().get_id()) { + if(ty_now.unwrap() == ty_after.unwrap()) { return Ok(ty_now); } return try_err!(istry, @@ -72,7 +72,7 @@ macro_rules! expr_gen { if let TypeAllowNull::No = t1 { return Ok(t1); } - if t1.unwrap().get_id() != t2.unwrap().get_id() { + if t1.unwrap() != t2.unwrap() { return try_err!(istry, Box::new(self.token_lexer.compiler_data.content.clone()), ErrorInfo::new(gettext!(TYPE_NOT_THE_SAME, t1, @@ -160,7 +160,7 @@ impl<'a> AstBuilder<'a> { Ok(()) } - fn opt_args(&mut self) -> AstError> { + fn opt_args(&mut self) -> AstError> { let mut ret = vec![]; loop { let t = self.expr(true); @@ -469,9 +469,6 @@ impl<'a> AstBuilder<'a> { pub fn generate_code(&mut self) -> RunResult<()> { loop { let token = self.token_lexer.next_token()?; - if token.tp == TokenType::EndOfLine { - continue; - } if token.tp == TokenType::EndOfFile { break; } diff --git a/rust/src/compiler/scope.rs b/rust/src/compiler/scope.rs index a9d70e4d..4e0b024f 100644 --- a/rust/src/compiler/scope.rs +++ b/rust/src/compiler/scope.rs @@ -1,7 +1,9 @@ use super::ValuePool; -use crate::base::stdlib::{ClassInterface, FunctionInterface, IOType, Stdlib, STDLIB_ROOT}; +use crate::base::stdlib::{ + ClassInterface, FunctionInterface, IOType, Stdlib, STDLIB_ROOT, STD_CLASS_TABLE, +}; use lazy_static::lazy_static; -use std::{cell::RefCell, collections::HashMap, fmt::Display, rc::Rc}; +use std::{borrow::Borrow, cell::RefCell, collections::HashMap, fmt::Display, rc::Rc}; lazy_static! { static ref VAR_TYPE: Vec = vec![ @@ -15,14 +17,14 @@ lazy_static! { #[derive(Clone, Debug)] pub enum TypeAllowNull { - Yes(Type), + Yes(usize), No, } impl TypeAllowNull { - pub fn unwrap(&self) -> &Type { + pub fn unwrap(&self) -> usize { match self { - TypeAllowNull::Yes(t) => t, + TypeAllowNull::Yes(t) => *t, TypeAllowNull::No => panic!("null"), } } @@ -147,10 +149,11 @@ pub struct SymScope { prev_scope: Option>>, sym_map: HashMap, scope_sym_id: usize, - types: HashMap, + types: HashMap, funcs: HashMap>, vars: HashMap, modules: HashMap, + types_id: usize, } impl SymScope { @@ -163,12 +166,14 @@ impl SymScope { funcs: HashMap::new(), vars: HashMap::new(), modules: HashMap::new(), + types_id: 0, }; match prev_scope { Some(prev_scope) => { ret.scope_sym_id = prev_scope.as_ref().borrow().scope_sym_id; + ret.types_id = prev_scope.as_ref().borrow().types_id; } - None => {} + None => ret.types_id = STD_CLASS_TABLE.with(|std| std.borrow().len()), } ret } @@ -187,7 +192,7 @@ impl SymScope { let types = &STDLIB_ROOT.sub_modules.get("prelude").unwrap().classes; for i in types { let idx = self.insert_sym(const_pool.name_pool[i.0]); - self.add_type(idx, Box::new((*i.1).clone())); + self.add_type(idx, (*i.1).clone()); } } @@ -246,11 +251,11 @@ impl SymScope { self.vars.insert(id, v); } - pub fn add_type(&mut self, id: usize, t: Type) { + pub fn add_type(&mut self, id: usize, t: usize) { self.types.insert(id, t); } - pub fn get_type(&self, id: usize) -> Type { + pub fn get_type(&self, id: usize) -> usize { return self.types.get(&id).unwrap().clone(); } diff --git a/rust/src/compiler/token.rs b/rust/src/compiler/token.rs index b697cced..e4959f29 100644 --- a/rust/src/compiler/token.rs +++ b/rust/src/compiler/token.rs @@ -2,7 +2,7 @@ use super::{Compiler, Content, Float}; use crate::{ base::error::{ self, ErrorContent, ErrorInfo, RunResult, RuntimeError, FLOAT_OVER_FLOW, NUMBER_OVER_FLOW, - PREFIX_FOR_FLOAT, SYNTAX_ERROR, + PREFIX_FOR_FLOAT, SYNTAX_ERROR, UNCLODED_COMMENT, }, cfg::FLOAT_OVER_FLOW_LIMIT, hash_map, @@ -125,7 +125,6 @@ pub enum TokenType { Func, Import, Return, - EndOfLine, EndOfFile, } @@ -190,7 +189,6 @@ impl Display for TokenType { TokenType::Class => "class", TokenType::Match => "match", TokenType::Func => "func", - TokenType::EndOfLine => "EOL", TokenType::EndOfFile => "EOF", TokenType::Import => "import", TokenType::Arrow => "->", @@ -415,13 +413,34 @@ impl TokenLex<'_> { '*', ), '%' => self.self_symbol(TokenType::Mod, TokenType::SelfMod), - '/' => self.double_symbol( - TokenType::Div, - TokenType::SelfDiv, - TokenType::ExactDiv, - TokenType::SelfExactDiv, - '/', - ), + '/' => { + // 特判注释 + let c = self.compiler_data.input.read(); + if c == '*' { + loop { + let c = self.compiler_data.input.read(); + if c == '*' { + let c = self.compiler_data.input.read(); + if c == '/' { + return self.next_token(); + } + } else if c == '\0' { + return Err(RuntimeError::new( + Box::new(self.compiler_data.content.clone()), + ErrorInfo::new(gettext(UNCLODED_COMMENT), gettext(SYNTAX_ERROR)), + )); + } + } + } + self.compiler_data.input.unread(c); + self.double_symbol( + TokenType::Div, + TokenType::SelfDiv, + TokenType::ExactDiv, + TokenType::SelfExactDiv, + '/', + ) + } '=' => self.binary_symbol(TokenType::Assign, TokenType::Equal, '='), '!' => self.binary_symbol(TokenType::Not, TokenType::NotEqual, '='), '>' => self.double_symbol( @@ -701,12 +720,9 @@ impl TokenLex<'_> { pub fn next_token(&mut self) -> RunResult { if !self.unget_token.is_empty() { let tmp = self.unget_token.pop().unwrap(); - if tmp.tp == TokenType::EndOfLine { - self.compiler_data.content.add_line(); - } return Ok(tmp); } - let mut presecnt_lex; + let presecnt_lex; loop { presecnt_lex = self.compiler_data.input.read(); match presecnt_lex { @@ -714,10 +730,24 @@ impl TokenLex<'_> { return Ok(Token::new(TokenType::EndOfFile, None)); } '\t' | ' ' => { - continue; + return self.next_token(); } '\n' => { self.compiler_data.content.add_line(); + return self.next_token(); + } + '#' => { + // 注释 + loop { + let c = self.compiler_data.input.read(); + if c == '\n' { + self.compiler_data.content.add_line(); + return self.next_token(); + } + if c == '\0' { + return Ok(Token::new(TokenType::EndOfFile, None)); + } + } } _ => break, } @@ -735,9 +765,6 @@ impl TokenLex<'_> { } pub fn next_back(&mut self, t: Token) { - if t.tp == TokenType::EndOfLine { - self.compiler_data.content.del_line(); - } self.unget_token.push(t); } @@ -969,6 +996,10 @@ mod tests { fn test_comprehensive_lex() { gen_test_token_env!( r#" + /*a complex test* + * + * + * end */ import "p" func a(int val) -> str { if val % 2 == 0 { @@ -977,11 +1008,13 @@ func a(int val) -> str { return "odd" } } +#djopekdpekdpedle func main() { - print("hello world") + print("hello world")#djeopjdfopejfopejfpejfop p := a(intinput()) print(p) } +#ojdeopjfoepjfopejop "#, t ); diff --git a/rust/src/tvm/algo.rs b/rust/src/tvm/algo.rs index 09da2971..d245b852 100644 --- a/rust/src/tvm/algo.rs +++ b/rust/src/tvm/algo.rs @@ -1 +1 @@ -mod string; +pub mod string; diff --git a/rust/src/tvm/stdlib.rs b/rust/src/tvm/stdlib.rs index 4428fdab..7320b586 100644 --- a/rust/src/tvm/stdlib.rs +++ b/rust/src/tvm/stdlib.rs @@ -1,7 +1,8 @@ use crate::base::stdlib::Stdlib; use derive::def_module; +pub mod algo; pub mod ds; pub mod prelude; -def_module!(module_name = std, submodules = [prelude, ds]); +def_module!(module_name = std, submodules = [prelude, ds, algo]); diff --git a/rust/src/tvm/stdlib/algo.rs b/rust/src/tvm/stdlib/algo.rs new file mode 100644 index 00000000..e6128790 --- /dev/null +++ b/rust/src/tvm/stdlib/algo.rs @@ -0,0 +1,4 @@ +use crate::tvm::stdlib::Stdlib; +use derive::def_module; + +def_module!(module_name = algo);