From 699d060bb3288d8f8c286b2079240dc240c1d0c6 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Wed, 5 Feb 2025 09:17:01 +0100 Subject: [PATCH 1/2] look into brace expansion and pattern matching --- Cargo.toml | 2 +- crates/deno_task_shell/src/grammar.pest | 42 ++++++++++---- crates/deno_task_shell/src/parser.rs | 64 +++++++++++++++++++-- crates/deno_task_shell/src/shell/execute.rs | 23 +++++++- 4 files changed, 114 insertions(+), 17 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e2047f6..42367f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,4 +8,4 @@ homepage = "https://github.com/prefix-dev/shell" repository = "https://github.com/prefix-dev/shell" license = "BSD-3-Clause" edition = "2021" -readme = "README.md" \ No newline at end of file +readme = "README.md" diff --git a/crates/deno_task_shell/src/grammar.pest b/crates/deno_task_shell/src/grammar.pest index 1d5325b..3d62c44 100644 --- a/crates/deno_task_shell/src/grammar.pest +++ b/crates/deno_task_shell/src/grammar.pest @@ -10,21 +10,23 @@ INT = { ("+" | "-")? ~ ASCII_DIGIT+ } QUOTED_WORD = { DOUBLE_QUOTED | SINGLE_QUOTED } UNQUOTED_PENDING_WORD = ${ - (TILDE_PREFIX ~ (!(OPERATOR | WHITESPACE | NEWLINE) ~ ( + (TILDE_PREFIX ~ (!(WHITESPACE | NEWLINE) ~ ( EXIT_STATUS | UNQUOTED_ESCAPE_CHAR | "$" ~ ARITHMETIC_EXPRESSION | - SUB_COMMAND | + SUB_COMMAND | + BRACE_EXPANSION | VARIABLE_EXPANSION | UNQUOTED_CHAR | QUOTED_WORD ))*) | - (!(OPERATOR | WHITESPACE | NEWLINE) ~ ( + (!(WHITESPACE | NEWLINE) ~ ( EXIT_STATUS | UNQUOTED_ESCAPE_CHAR | "$" ~ ARITHMETIC_EXPRESSION | SUB_COMMAND | + BRACE_EXPANSION | VARIABLE_EXPANSION | UNQUOTED_CHAR | QUOTED_WORD @@ -36,6 +38,7 @@ QUOTED_PENDING_WORD = ${ ( QUOTED_ESCAPE_CHAR | "$" ~ ARITHMETIC_EXPRESSION | SUB_COMMAND | + BRACE_EXPANSION | VARIABLE_EXPANSION | QUOTED_CHAR )* } @@ -46,6 +49,7 @@ PARAMETER_PENDING_WORD = ${ PARAMETER_ESCAPE_CHAR | "$" ~ ARITHMETIC_EXPRESSION | SUB_COMMAND | + BRACE_EXPANSION | VARIABLE_EXPANSION | QUOTED_WORD | QUOTED_CHAR @@ -55,6 +59,7 @@ PARAMETER_PENDING_WORD = ${ PARAMETER_ESCAPE_CHAR | "$" ~ ARITHMETIC_EXPRESSION | SUB_COMMAND | + BRACE_EXPANSION | VARIABLE_EXPANSION | QUOTED_WORD | QUOTED_CHAR @@ -62,26 +67,28 @@ PARAMETER_PENDING_WORD = ${ } FILE_NAME_PENDING_WORD = ${ - (TILDE_PREFIX ~ (!(WHITESPACE | OPERATOR | NEWLINE) ~ ( + (TILDE_PREFIX ~ (!(WHITESPACE | NEWLINE) ~ ( UNQUOTED_ESCAPE_CHAR | + BRACE_EXPANSION | VARIABLE_EXPANSION | UNQUOTED_CHAR | QUOTED_WORD ))*) | - (!(WHITESPACE | OPERATOR | NEWLINE) ~ ( + (!(WHITESPACE | NEWLINE) ~ ( UNQUOTED_ESCAPE_CHAR | + BRACE_EXPANSION | VARIABLE_EXPANSION | UNQUOTED_CHAR | QUOTED_WORD ))+ } -UNQUOTED_ESCAPE_CHAR = ${ ("\\" ~ "$" | "$" ~ !"(" ~ !"{" ~ !VARIABLE) | "\\" ~ (" " | "`" | "\"" | "(" | ")") } +UNQUOTED_ESCAPE_CHAR = ${ ("\\" ~ "$" | "$" ~ !"(" ~ !"{" ~ !VARIABLE) | "\\" ~ (" " | "`" | "\"" | "(" | ")" | "{" | "}") } QUOTED_ESCAPE_CHAR = ${ "\\" ~ "$" | "$" ~ !"(" ~ !"{" ~ !(ASCII_DIGIT | VARIABLE) | "\\" ~ ("`" | "\"" | "(" | ")" | "'") } PARAMETER_ESCAPE_CHAR = ${ "\\" ~ "$" | "$" ~ !"(" ~ !"{" ~ !VARIABLE | "\\" ~ "}" } -UNQUOTED_CHAR = ${ ("\\" ~ " ") | !("]]" | "[[" | "(" | ")" | "<" | ">" | "|" | "&" | ";" | "\"" | "'" | "$") ~ ANY } +UNQUOTED_CHAR = ${ ("\\" ~ " ") | !OPERATOR ~ ANY } QUOTED_CHAR = ${ !"\"" ~ ANY } VARIABLE_EXPANSION = ${ @@ -92,6 +99,21 @@ VARIABLE_EXPANSION = ${ ) } +// {1..5} -> 1 2 3 4 5 +// {1..5..2} -> 1 3 5 +// {1,24,5,123} -> 1 24 5 123 +// TODO: this should support some more things: `{1..100}` should work, and `{1..a}` (single unicode char) +// However, {aa..bb} should not work, as it's not a valid range +BRACE_ELEMENT = ${ ASCII_DIGIT+ | ANY } +BRACE_RANGE_EXPANSION = ${ + "{" ~ (BRACE_ELEMENT ~ ".." ~ BRACE_ELEMENT ~ (".." ~ BRACE_ELEMENT)?) ~ "}" +} + +COMMA = { "," } +BRACE_LIST_EXPANSION = ${ "{" ~ (UNQUOTED_PENDING_WORD? ~ COMMA)+ ~ (UNQUOTED_PENDING_WORD? ~ COMMA?) ~ "}" } + +BRACE_EXPANSION = _{ BRACE_RANGE_EXPANSION | BRACE_LIST_EXPANSION } + SPECIAL_PARAM = ${ ARGNUM | "@" | "#" | "?" | "$" | "*" } ARGNUM = ${ ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* | "0" } VARIABLE = ${ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* } @@ -153,7 +175,7 @@ EXIT_STATUS = ${ "$?" } // Operators OPERATOR = _{ AND_IF | OR_IF | DSEMI | DLESS | DGREAT | LESSAND | GREATAND | LESSGREAT | DLESSDASH | CLOBBER | - "," |"(" | ")" | "{" | "}" | ";" | "&" | "|" | "<" | ">" + "(" | ")" | ";" | "&" | "|" | "<" | ">" | "{" | "}" | "," } // Reserved words @@ -197,7 +219,6 @@ command = !{ } compound_command = { - brace_group | ARITHMETIC_EXPRESSION | subshell | for_clause | @@ -292,7 +313,7 @@ term = !{ and_or ~ (separator ~ and_or)* } for_clause = { For ~ name ~ linebreak ~ - (In ~ (brace_group | wordlist)? ~ (";" | NEWLINE))? ~ + (In ~ (wordlist)? ~ (";" | NEWLINE))? ~ do_group } @@ -384,7 +405,6 @@ function_body = !{ compound_command ~ redirect_list? } fname = @{ RESERVED_WORD | NAME | ASSIGNMENT_WORD | UNQUOTED_PENDING_WORD } name = @{ NAME } -brace_group = !{ Lbrace ~ compound_list ~ Rbrace } do_group = !{ Do ~ compound_list ~ Done } simple_command = !{ diff --git a/crates/deno_task_shell/src/parser.rs b/crates/deno_task_shell/src/parser.rs index d7085e5..1212a7f 100644 --- a/crates/deno_task_shell/src/parser.rs +++ b/crates/deno_task_shell/src/parser.rs @@ -395,6 +395,29 @@ pub enum VariableModifier { AlternateValue(Word), } +#[cfg_attr(feature = "serialization", derive(serde::Serialize))] +#[cfg_attr( + feature = "serialization", + serde(rename_all = "camelCase", tag = "kind", content = "value") +)] +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum BraceElement { + Integer(i64), + String(String), +} + +#[cfg_attr(feature = "serialization", derive(serde::Serialize))] +#[cfg_attr( + feature = "serialization", + serde(rename_all = "camelCase", tag = "kind", content = "value") +)] +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct BraceRange { + pub start: BraceElement, + pub end: BraceElement, + pub step: Option, +} + #[cfg_attr(feature = "serialization", derive(serde::Serialize))] #[cfg_attr( feature = "serialization", @@ -414,6 +437,8 @@ pub enum WordPart { Tilde(TildePrefix), #[error("Invalid arithmetic expression")] Arithmetic(Arithmetic), + #[error("Invalid range expression")] + BraceRange(BraceRange), #[error("Invalid exit status")] ExitStatus, } @@ -996,9 +1021,6 @@ fn parse_for_loop(pairs: Pair) -> Result { fn parse_compound_command(pair: Pair) -> Result { let inner = pair.into_inner().next().unwrap(); match inner.as_rule() { - Rule::brace_group => { - Err(miette!("Unsupported compound command brace_group")) - } Rule::subshell => parse_subshell(inner), Rule::for_clause => { let for_loop = parse_for_loop(inner); @@ -1222,7 +1244,7 @@ fn parse_binary_conditional_expression(pair: Pair) -> Result { let left_word = parse_word(left)?; let right_word = parse_word(right)?; - + println!("right word: {:?}", right_word); let op = match operator.as_rule() { Rule::binary_bash_conditional_op => match operator.as_str() { "==" => BinaryOp::Equal, @@ -1268,12 +1290,41 @@ fn parse_binary_conditional_expression(pair: Pair) -> Result { }) } + +fn parse_brace_element(pair: Pair) -> Result { + let text = pair.as_str(); + if let Ok(num) = text.parse::() { + Ok(BraceElement::Integer(num)) + } else { + Ok(BraceElement::String(text.to_string())) + } +} + +fn parse_brace_expansion(pair: Pair) -> Result { + let mut inner = pair.into_inner(); + + let start = inner + .next() + .ok_or_else(|| miette!("Expected start of brace expansion"))?; + let start = parse_brace_element(start)?; + + let end = inner + .next() + .ok_or_else(|| miette!("Expected end of brace expansion"))?; + let end = parse_brace_element(end)?; + + let step = inner.next().map(parse_brace_element).transpose()?; + + Ok(WordPart::BraceRange(BraceRange { start, end, step })) +} + fn parse_word(pair: Pair) -> Result { let mut parts = Vec::new(); match pair.as_rule() { Rule::UNQUOTED_PENDING_WORD => { for part in pair.into_inner() { + println!("part: {:?}", part.as_rule()); match part.as_rule() { Rule::EXIT_STATUS => parts.push(WordPart::ExitStatus), Rule::UNQUOTED_CHAR => { @@ -1321,6 +1372,11 @@ fn parse_word(pair: Pair) -> Result { let tilde_prefix = parse_tilde_prefix(part)?; parts.push(tilde_prefix); } + Rule::BRACE_RANGE_EXPANSION => { + println!("Part: {:?}", part); + let brace_expansion = parse_brace_expansion(part)?; + parts.push(brace_expansion); + } Rule::ARITHMETIC_EXPRESSION => { let arithmetic_expression = parse_arithmetic_expression(part)?; parts.push(WordPart::Arithmetic(arithmetic_expression)); diff --git a/crates/deno_task_shell/src/shell/execute.rs b/crates/deno_task_shell/src/shell/execute.rs index b13bf82..2fac1fe 100644 --- a/crates/deno_task_shell/src/shell/execute.rs +++ b/crates/deno_task_shell/src/shell/execute.rs @@ -17,6 +17,7 @@ use tokio_util::sync::CancellationToken; use crate::parser::AssignmentOp; use crate::parser::BinaryOp; +use crate::parser::BraceRange; use crate::parser::Condition; use crate::parser::ConditionInner; use crate::parser::ElsePart; @@ -446,6 +447,7 @@ async fn resolve_redirect_word_pipe( &mut state.clone(), stdin.clone(), stderr.clone(), + true, ) .await; let words = match words { @@ -1303,6 +1305,7 @@ pub async fn evaluate_args( state, stdin.clone(), stderr.clone(), + true, ) .await?; result.extend(parts); @@ -1317,7 +1320,20 @@ async fn evaluate_word( stderr: ShellPipeWriter, ) -> Result { Ok( - evaluate_word_parts(word.into_parts(), state, stdin, stderr) + evaluate_word_parts(word.into_parts(), state, stdin, stderr, true) + .await? + .into(), + ) +} + +async fn evaluate_word_no_glob( + word: Word, + state: &mut ShellState, + stdin: ShellPipeReader, + stderr: ShellPipeWriter, +) -> Result { + Ok( + evaluate_word_parts(word.into_parts(), state, stdin, stderr, false) .await? .into(), ) @@ -1469,6 +1485,7 @@ fn evaluate_word_parts( state: &mut ShellState, stdin: ShellPipeReader, stderr: ShellPipeWriter, + expand_glob: bool, ) -> LocalBoxFuture> { fn text_parts_to_string(parts: Vec) -> String { let mut result = @@ -1645,6 +1662,10 @@ fn evaluate_word_parts( )) } } + WordPart::BraceRange(BraceRange { start, end, step: _ }) => { + current_text.push(TextPart::Text(format!("{:?} -- {:?}", start, end))); + continue; + } WordPart::Arithmetic(arithmetic) => { let arithmetic_result = execute_arithmetic_expression(arithmetic, state).await?; From 0663c1493bba659d3197efdfa0210afb9a0fd934 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Thu, 6 Feb 2025 11:42:27 +0100 Subject: [PATCH 2/2] add testss --- crates/tests/test-data/brace_expansion.sh | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 crates/tests/test-data/brace_expansion.sh diff --git a/crates/tests/test-data/brace_expansion.sh b/crates/tests/test-data/brace_expansion.sh new file mode 100644 index 0000000..797c16d --- /dev/null +++ b/crates/tests/test-data/brace_expansion.sh @@ -0,0 +1,42 @@ +# Note: bash and zsh are quite a bit different with brace expansion +# We follow the simpler bash rules (e.g. no expansion of variables in ranges) +> echo {1..10} +1 2 3 4 5 6 7 8 9 10 + +> echo {01..20} +01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 + +> FOOBAR=5 +> echo {1..$FOOBAR} +{1..5} + +> {1..{1..5}} +{1..{1..5}} + +> echo {1..x} +{1..x} + +> echo {a..c} +a b c + +> echo {1..10..2} +1 3 5 7 9 + +> echo {10..1..2} +10 8 6 4 2 + +> echo {10..1..-4} +10 6 2 + +> echo {0a..0c} +{0a..0c} + +> echo {aa..ac} +{aa..ac} + +> echo {001..10} +001 002 003 004 005 006 007 008 009 010 + +# If leading 0 are indicated, all numbers will have leading 0 up to the maximum digits +> echo {01..100} +001 002 003 004 005 006 007 008 009 010 ... \ No newline at end of file