Skip to content

Commit

Permalink
feat: whitespace stripping (#3)
Browse files Browse the repository at this point in the history
* feat: whitespace stripping

* fix: bool negation

* chore: cargo fmt

* Update wdl_util.rs

* Update wdl_util.rs

* Update wdl_util.rs
  • Loading branch information
a-frantz authored Sep 5, 2024
1 parent a18e40e commit bd760ef
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Crankshaft.
pub mod engine;
pub mod wdl_util;

/// A boxed [`std::error::Error`].
pub type BoxedError = Box<dyn std::error::Error>;
Expand Down
122 changes: 122 additions & 0 deletions src/wdl_util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//! Module for utility functions.
//!
//! Currently that is whitespace stripping.
/// Removes line continuations from a string.
fn remove_line_continuations(s: &str) -> String {
let mut result = String::new();
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '\\' {
if let Some(next) = chars.next() {
if next == '\n' {
for ws in chars.by_ref() {
if ws == ' ' || ws == '\t' {
continue;
} else {
result.push(ws);
break;
}
}
}
}
}
result.push(c);
}
result
}

/// Calculates the leading whitespace of a string.
fn calculate_leading_whitespace(s: &str) -> usize {
let mut min_leading_whitespace = usize::MAX;
let mut parsing_leading_whitespace = true;
let mut cur_char_index = 0;
let mut cur_line_index = 0;
s.chars().for_each(|c| match c {
' ' | '\t' if parsing_leading_whitespace => cur_char_index += 1,
'\n' => {
parsing_leading_whitespace = true;
cur_line_index += 1;
cur_char_index = 0;
}
_ => {
parsing_leading_whitespace = false;
if cur_char_index < min_leading_whitespace {
min_leading_whitespace = cur_char_index;
}
cur_char_index += 1;
}
});
if min_leading_whitespace == usize::MAX {
0
} else {
min_leading_whitespace
}
}

/// Strips leading whitespace from a string.
pub fn strip_leading_whitespace(s: &str, command: bool) -> String {
let s_owned = if command {
s.to_string()
} else {
remove_line_continuations(s)
};
let leading_whitespace = calculate_leading_whitespace(&s_owned);
let result = s_owned
.lines()
.map(|line| {
if line.len() >= leading_whitespace {
&line[leading_whitespace..]
} else {
""
}
})
.collect::<Vec<&str>>()
.join("\n");
result
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_remove_line_continuations() {
let input = "first line \
still first line
second line";
let expected = "first line still first line\nsecond line";
assert_eq!(remove_line_continuations(input), expected);
}

#[test]
fn test_calculate_leading_whitespace() {
let input = " first line is indented 4 spaces
second line is indented 8 spaces
third line is indented 4 spaces";
assert_eq!(calculate_leading_whitespace(input), 4);
let input = " first line is indented 4 spaces
\t \t second line is indented with a mix of 8 spaces and tabs
\t\t\t\tfourth line is indented 4 tabs";
assert_eq!(calculate_leading_whitespace(input), 4);
}

#[test]
fn test_strip_leading_whitespace_not_in_command() {
let input = " first line is indented 4 spaces \
still first line
second line is indented 8 spaces
third line is indented 4 spaces";
let expected = "first line is indented 4 spaces still first line\n second line is indented 8 spaces\nthird line is indented 4 spaces";
assert_eq!(strip_leading_whitespace(input, false), expected);
}

#[test]
fn test_strip_leading_whitespace_in_command() {
let input = " first line is indented 4 spaces and trails a backslash \\
second line is indented 8 spaces
third line is indented 4 spaces";
let expected = "first line is indented 4 spaces and trails a backslash \\\n second line is indented 8 spaces\nthird line is indented 4 spaces";
assert_eq!(strip_leading_whitespace(input, true), expected);
}
}

0 comments on commit bd760ef

Please sign in to comment.