Skip to content

Commit

Permalink
Merge pull request #628 from luketpeterson/main
Browse files Browse the repository at this point in the history
Adding support for parsing escape sequences in s-expression parser
  • Loading branch information
vsbogd authored Mar 19, 2024
2 parents 9296781 + 2bf2a79 commit d7d421c
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 8 deletions.
66 changes: 60 additions & 6 deletions lib/src/metta/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,24 +426,62 @@ impl<'a> SExprParser<'a> {
let leftover_text_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::LeftoverText, start_idx..self.cur_idx(), vec![], "Double quote expected".to_string());
return leftover_text_node;
}
while let Some((_idx, c)) = self.it.next() {
while let Some((char_idx, c)) = self.it.next() {
if c == '"' {
token.push('"');
let string_node = SyntaxNode::new_token_node(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), token);
return string_node;
}
let c = if c == '\\' {
if c == '\\' {
let escape_err = |cur_idx| { SyntaxNode::incomplete_with_message(SyntaxNodeType::StringToken, char_idx..cur_idx, vec![], "Invalid escape sequence".to_string()) };

match self.it.next() {
Some((_idx, c)) => c,
Some((_idx, c)) => {
let val = match c {
'\'' | '\"' | '\\' => c, //single quote, double quote, & backslash
'n' => '\n', // newline
'r' => '\r', // carriage return
't' => '\t', // tab
'x' => { // hex sequence
let mut code: Option<u8> = None;
if let Some((_, digit1)) = self.it.next() {
if digit1.is_digit(16) {
if let Ok(digit1_byte) = TryInto::<u8>::try_into(digit1) {
// Try to extract a valid 2-digit code
if let Some((_, digit2)) = self.it.peek() {
if digit2.is_digit(16) {
if let Ok(digit2_byte) = TryInto::<u8>::try_into(*digit2) {
self.it.next().unwrap();
let digits_buf = &[digit1_byte, digit2_byte];
let code_val = u8::from_str_radix(core::str::from_utf8(digits_buf).unwrap(), 16).unwrap();
if code_val <= 0x7F { //Cap it at 0x7F so we don't generate invalid UTF-8
code = Some(code_val);
}
}
}
}
}
}
}
match code {
Some(code) => code.into(),
None => { return escape_err(self.cur_idx()); }
}
},
_ => {
return escape_err(self.cur_idx());
}
};
token.push(val);
},
None => {
let leftover_text_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), vec![], "Escaping sequence is not finished".to_string());
return leftover_text_node;
},
}
} else {
c
};
token.push(c);
token.push(c);
}
}
let unclosed_string_node = SyntaxNode::incomplete_with_message(SyntaxNodeType::StringToken, start_idx..self.cur_idx(), vec![], "Unclosed String Literal".to_string());
unclosed_string_node
Expand Down Expand Up @@ -545,6 +583,22 @@ mod tests {
assert_eq!(vec![expr!("\"te st\"")], parse_atoms("\"te st\""));
}

#[test]
fn test_text_escape_chars() {
// Tab
assert_eq!(vec![expr!("\"test\ttab\"")], parse_atoms(r#""test\ttab""#));
// Newline
assert_eq!(vec![expr!("\"test\nnewline\"")], parse_atoms(r#""test\nnewline""#));
// ANSI Sequence
assert_eq!(vec![expr!("\"\x1b[1;32m> \x1b[0m\"")], parse_atoms(r#""\x1b[1;32m> \x1b[0m""#));
// Escaping a quote
assert_eq!(vec![expr!("\"test\"quote\"")], parse_atoms(r#""test\"quote""#));
// Two-digit hex code
assert_eq!(vec![expr!("\"test\x7Fmax\"")], parse_atoms(r#""test\x7fmax""#));
// Parse failure, code out of range
assert!(parse_atoms(r#""test\xFF""#).len() == 0);
}

#[test]
fn test_text_recognize_full_token() {
let mut tokenizer = Tokenizer::new();
Expand Down
3 changes: 2 additions & 1 deletion repl/src/config_params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ pub fn builtin_init_metta_code() -> String {
format!(r#"
; !(bind! {CFG_HISTORY_MAX_LEN} (new-state 500)) ; TODO, enable this when value-bridging is implemented
!(bind! {CFG_PROMPT} (new-state "> "))
; !(bind! {CFG_STYLED_PROMPT} (new-state (concat "\x1b[1;32m" (get-state {CFG_PROMPT}) "\x1b[0m"))) ;TODO, two things before this works. Escape parsing in string literals, and string stdlib type with concat operation
; !(bind! {CFG_STYLED_PROMPT} (new-state (concat "\x1b[1;32m" (get-state {CFG_PROMPT}) "\x1b[0m"))) ;TODO, Need string stdlib type with concat operation
!(bind! {CFG_STYLED_PROMPT} (new-state "\x1b[1;32m> \x1b[0m"))
!(bind! {CFG_BRACKET_STYLES} (new-state ("94" "93" "95" "96")))
!(bind! {CFG_COMMENT_STYLE} (new-state "32"))
!(bind! {CFG_VARIABLE_STYLE} (new-state "33"))
Expand Down
2 changes: 1 addition & 1 deletion repl/src/repl.default.metta
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; !(change-state! &ReplHistoryMaxLen 500) ; TODO: enable this when I have value bridging implemented.

!(change-state! &ReplPrompt "> ")
; !(change-state! &ReplStyledPrompt "\x1b[1;32m> \x1b[0m") ; TODO: currently the MeTTa string parser doesn't resolve escape chars, although perhaps it should
!(change-state! &ReplStyledPrompt "\x1b[1;32m> \x1b[0m") ;

; TODO: somebody with better design sense should tweak these, and also provide dark-mode setings
; ANSI escape codes to configure the syntax highlighter
Expand Down

0 comments on commit d7d421c

Please sign in to comment.