From fc7c18cb47b70becb7828c2e0a7d8b9a871ac24c Mon Sep 17 00:00:00 2001 From: Luke Peterson Date: Thu, 21 Mar 2024 17:42:34 +0900 Subject: [PATCH] Flowing errors in tokenizer atom-constructor functions through to parser --- lib/src/metta/runner/arithmetics.rs | 20 ++++++++++---------- lib/src/metta/runner/stdlib.rs | 12 ++++++------ lib/src/metta/runner/stdlib_minimal.rs | 12 ++++++------ lib/src/metta/text.rs | 17 ++++++++++++++++- 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/lib/src/metta/runner/arithmetics.rs b/lib/src/metta/runner/arithmetics.rs index e841c26af..9c03f4de6 100644 --- a/lib/src/metta/runner/arithmetics.rs +++ b/lib/src/metta/runner/arithmetics.rs @@ -56,14 +56,14 @@ impl Into for Number { } impl Number { - pub fn from_int_str(num: &str) -> Self { - let n = num.parse::().expect("Could not parse integer"); - Self::Integer(n) + pub fn from_int_str(num: &str) -> Result { + let n = num.parse::().map_err(|e| format!("Could not parse integer: '{num}', {e}"))?; + Ok(Self::Integer(n)) } - pub fn from_float_str(num: &str) -> Self { - let n = num.parse::().expect("Could not parse float"); - Self::Float(n) + pub fn from_float_str(num: &str) -> Result { + let n = num.parse::().map_err(|e| format!("Could not parse float: '{num}', {e}"))?; + Ok(Self::Float(n)) } pub fn promote(a: Number, b: Number) -> (Number, Number) { @@ -406,10 +406,10 @@ mod tests { #[test] fn number() { - assert_eq!(Number::from_int_str("12345"), Number::Integer(12345i64)); - assert_eq!(Number::from_float_str("123.45"), Number::Float(123.45f64)); - assert_eq!(Number::from_float_str("12345e-02"), Number::Float(123.45f64)); - assert_eq!(Number::from_float_str("1.2345e+2"), Number::Float(123.45f64)); + assert_eq!(Number::from_int_str("12345").unwrap(), Number::Integer(12345i64)); + assert_eq!(Number::from_float_str("123.45").unwrap(), Number::Float(123.45f64)); + assert_eq!(Number::from_float_str("12345e-02").unwrap(), Number::Float(123.45f64)); + assert_eq!(Number::from_float_str("1.2345e+2").unwrap(), Number::Float(123.45f64)); assert_eq!(format!("{}", Number::Integer(12345i64)), "12345"); assert_eq!(format!("{}", Number::Float(123.45f64)), "123.45"); } diff --git a/lib/src/metta/runner/stdlib.rs b/lib/src/metta/runner/stdlib.rs index 9fbbfd655..6ca4d439b 100644 --- a/lib/src/metta/runner/stdlib.rs +++ b/lib/src/metta/runner/stdlib.rs @@ -1415,12 +1415,12 @@ mod non_minimal_only_stdlib { let mut rust_tokens = Tokenizer::new(); let tref = &mut rust_tokens; - tref.register_token(regex(r"[\-\+]?\d+"), - |token| { Atom::gnd(Number::from_int_str(token)) }); - tref.register_token(regex(r"[\-\+]?\d+\.\d+"), - |token| { Atom::gnd(Number::from_float_str(token)) }); - tref.register_token(regex(r"[\-\+]?\d+(\.\d+)?[eE][\-\+]?\d+"), - |token| { Atom::gnd(Number::from_float_str(token)) }); + tref.register_fallible_token(regex(r"[\-\+]?\d+"), + |token| { Ok(Atom::gnd(Number::from_int_str(token)?)) }); + tref.register_fallible_token(regex(r"[\-\+]?\d+\.\d+"), + |token| { Ok(Atom::gnd(Number::from_float_str(token)?)) }); + tref.register_fallible_token(regex(r"[\-\+]?\d+(\.\d+)?[eE][\-\+]?\d+"), + |token| { Ok(Atom::gnd(Number::from_float_str(token)?)) }); tref.register_token(regex(r"True|False"), |token| { Atom::gnd(Bool::from_str(token)) }); let sum_op = Atom::gnd(SumOp{}); diff --git a/lib/src/metta/runner/stdlib_minimal.rs b/lib/src/metta/runner/stdlib_minimal.rs index 05b20b4c5..c33c7f7c2 100644 --- a/lib/src/metta/runner/stdlib_minimal.rs +++ b/lib/src/metta/runner/stdlib_minimal.rs @@ -428,12 +428,12 @@ pub fn register_rust_stdlib_tokens(target: &mut Tokenizer) { let mut rust_tokens = Tokenizer::new(); let tref = &mut rust_tokens; - tref.register_token(regex(r"[\-\+]?\d+"), - |token| { Atom::gnd(Number::from_int_str(token)) }); - tref.register_token(regex(r"[\-\+]?\d+\.\d+"), - |token| { Atom::gnd(Number::from_float_str(token)) }); - tref.register_token(regex(r"[\-\+]?\d+(\.\d+)?[eE][\-\+]?\d+"), - |token| { Atom::gnd(Number::from_float_str(token)) }); + tref.register_fallible_token(regex(r"[\-\+]?\d+"), + |token| { Ok(Atom::gnd(Number::from_int_str(token)?)) }); + tref.register_fallible_token(regex(r"[\-\+]?\d+\.\d+"), + |token| { Ok(Atom::gnd(Number::from_float_str(token)?)) }); + tref.register_fallible_token(regex(r"[\-\+]?\d+(\.\d+)?[eE][\-\+]?\d+"), + |token| { Ok(Atom::gnd(Number::from_float_str(token)?)) }); tref.register_token(regex(r"True|False"), |token| { Atom::gnd(Bool::from_str(token)) }); let sum_op = Atom::gnd(SumOp{}); diff --git a/lib/src/metta/text.rs b/lib/src/metta/text.rs index 3d29cbbfb..ebec17aec 100644 --- a/lib/src/metta/text.rs +++ b/lib/src/metta/text.rs @@ -194,7 +194,8 @@ impl SyntaxNode { let token_text = self.parsed_text.as_ref().unwrap(); let constr = tokenizer.find_token(token_text); if let Some(constr) = constr { - let new_atom = constr(token_text).unwrap(); //TODO, If the Tokenizer's atom constructor throws an error, then gracefully alert the user + let new_atom = constr(token_text) + .map_err(|e| format!("byte range = ({:?}) | {e}", self.src_range))?; Ok(Some(new_atom)) } else { let new_atom = Atom::sym(token_text); @@ -663,6 +664,20 @@ mod tests { assert_eq!(Err(String::from("Unexpected right bracket")), parser.parse(&Tokenizer::new())); } + #[test] + fn test_error_from_tokenizer() { + //NOTE: This test relies on an intentional bug in the regex, so that it will accept an invalid + // float. However it could be hit in legitimate cases, such as an integer that overflows the + // type's capacity before we implement bigint, or any type where the representation's actual + // contours can't be captured by a regex. + let mut tokenizer = Tokenizer::new(); + tokenizer.register_fallible_token(Regex::new(r"[\-\+]?\d+.\d+").unwrap(), + |token| Ok(Atom::gnd(metta::runner::arithmetics::Number::from_float_str(token)?)) + ); + let mut parser = SExprParser::new("12345678901234567:8901234567890"); + assert!(parser.parse(&tokenizer).is_err()); + } + #[test] fn test_comment_base() { let program = ";(a 4)