From eb25539435c4b3a850d69d7b5b1c7a5085656179 Mon Sep 17 00:00:00 2001 From: Brett Mayson Date: Tue, 22 Oct 2024 07:44:04 +0000 Subject: [PATCH] fix unicode handling --- libs/config/src/analyze/chumsky.rs | 2 +- .../config/src/analyze/lints/c01_invalid_value.rs | 4 ++-- .../src/analyze/lints/c07_expected_array.rs | 2 +- .../src/analyze/lints/c08_missing_semicolon.rs | 2 +- .../src/analyze/lints/c10_class_missing_braces.rs | 2 +- libs/sqf/src/analyze/lints/s05_if_assign.rs | 2 +- libs/sqf/src/parser/lexer.rs | 6 +++--- libs/sqf/tests/lints.rs | 1 + libs/sqf/tests/lints/s05_if_assign_emoji.sqf | 2 ++ .../lints__simple_s05_if_assign_emoji.snap | 13 +++++++++++++ libs/workspace/src/reporting/processed.rs | 15 +++++++++++++-- 11 files changed, 39 insertions(+), 12 deletions(-) create mode 100644 libs/sqf/tests/lints/s05_if_assign_emoji.sqf create mode 100644 libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap diff --git a/libs/config/src/analyze/chumsky.rs b/libs/config/src/analyze/chumsky.rs index 333d77395..2dfca842f 100644 --- a/libs/config/src/analyze/chumsky.rs +++ b/libs/config/src/analyze/chumsky.rs @@ -55,7 +55,7 @@ impl ChumskyCode { { end += 1; } - &processed.as_str()[start..end] + processed.extract(start..end).to_string() } )); } diff --git a/libs/config/src/analyze/lints/c01_invalid_value.rs b/libs/config/src/analyze/lints/c01_invalid_value.rs index 9935c45c2..278926c68 100644 --- a/libs/config/src/analyze/lints/c01_invalid_value.rs +++ b/libs/config/src/analyze/lints/c01_invalid_value.rs @@ -159,7 +159,7 @@ impl CodeC01InvalidValue { #[must_use] pub fn new(span: Range, processed: &Processed) -> Self { Self { - value: processed.as_str()[span.clone()].to_string(), + value: processed.extract(span.clone()).to_string(), span, diagnostic: None, } @@ -218,7 +218,7 @@ impl CodeC01InvalidValueMacro { if let Some(diag) = &mut self.diagnostic { diag.notes.push(format!( "The processed output was:\n{} ", - &processed.as_str()[self.span.start..self.span.end] + processed.extract(self.span.clone()) )); } self diff --git a/libs/config/src/analyze/lints/c07_expected_array.rs b/libs/config/src/analyze/lints/c07_expected_array.rs index c73dc5a72..94defd3ed 100644 --- a/libs/config/src/analyze/lints/c07_expected_array.rs +++ b/libs/config/src/analyze/lints/c07_expected_array.rs @@ -166,7 +166,7 @@ impl Code07ExpectedArray { let ident_end = processed .mapping(name.span.end) .expect("mapping should exist"); - let haystack = &processed.as_str()[ident_end.original_start()..value.span().start]; + let haystack = &processed.extract(ident_end.original_start()..value.span().start); let possible_end = ident_end.original_start() + haystack.find(']').unwrap_or(1) + 1; self.suggestion = Some(name.value.to_string()); self.diagnostic = Diagnostic::from_code_processed( diff --git a/libs/config/src/analyze/lints/c08_missing_semicolon.rs b/libs/config/src/analyze/lints/c08_missing_semicolon.rs index 152acf6b7..5915daee6 100644 --- a/libs/config/src/analyze/lints/c08_missing_semicolon.rs +++ b/libs/config/src/analyze/lints/c08_missing_semicolon.rs @@ -140,7 +140,7 @@ impl Code08MissingSemicolon { } fn generate_processed(mut self, processed: &Processed) -> Self { - let haystack = &processed.as_str()[self.span.clone()]; + let haystack = &processed.extract(self.span.clone()); let possible_end = self.span.start + haystack .find('\n') diff --git a/libs/config/src/analyze/lints/c10_class_missing_braces.rs b/libs/config/src/analyze/lints/c10_class_missing_braces.rs index 56d55eab9..30e915309 100644 --- a/libs/config/src/analyze/lints/c10_class_missing_braces.rs +++ b/libs/config/src/analyze/lints/c10_class_missing_braces.rs @@ -128,7 +128,7 @@ impl Code10ClassMissingBraces { } fn generate_processed(mut self, processed: &Processed) -> Self { - let haystack = &processed.as_str()[self.span.clone()]; + let haystack = &processed.extract(self.span.clone()); let possible_end = self.span.start + haystack .find('\n') diff --git a/libs/sqf/src/analyze/lints/s05_if_assign.rs b/libs/sqf/src/analyze/lints/s05_if_assign.rs index 698ff2048..76b5f3737 100644 --- a/libs/sqf/src/analyze/lints/s05_if_assign.rs +++ b/libs/sqf/src/analyze/lints/s05_if_assign.rs @@ -202,7 +202,7 @@ impl CodeS05IfAssign { } fn generate_processed(mut self, processed: &Processed) -> Self { - let haystack = &processed.as_str()[self.rhs.1.end..]; + let haystack = &processed.extract_from(self.rhs.1.end..); let end_position = self.rhs.1.end + haystack.find('}').unwrap_or(0) + 1; self.diagnostic = Diagnostic::from_code_processed(&self, self.if_cmd.start..end_position, processed); diff --git a/libs/sqf/src/parser/lexer.rs b/libs/sqf/src/parser/lexer.rs index 221cefe98..fdf7aa96e 100644 --- a/libs/sqf/src/parser/lexer.rs +++ b/libs/sqf/src/parser/lexer.rs @@ -9,9 +9,9 @@ use crate::StringWrapper; pub type Tokens = Vec<(Token, Range)>; macro_rules! chain_collect { - ($Collect:ty: $($value:expr),+ $(,)?) => { - std::iter::empty()$(.chain($value))+.collect::<$Collect>() - }; + ($Collect:ty: $($value:expr),+ $(,)?) => { + std::iter::empty()$(.chain($value))+.collect::<$Collect>() + }; } pub fn strip_comments(tokens: &mut Tokens) { diff --git a/libs/sqf/tests/lints.rs b/libs/sqf/tests/lints.rs index 484bffc09..01e91605e 100644 --- a/libs/sqf/tests/lints.rs +++ b/libs/sqf/tests/lints.rs @@ -24,6 +24,7 @@ lint!(s02_event_handler_case); lint!(s03_static_typename); lint!(s04_command_case); lint!(s05_if_assign); +lint!(s05_if_assign_emoji); lint!(s06_find_in_str); lint!(s07_select_parse_number); lint!(s08_format_args); diff --git a/libs/sqf/tests/lints/s05_if_assign_emoji.sqf b/libs/sqf/tests/lints/s05_if_assign_emoji.sqf new file mode 100644 index 000000000..7f30c2ece --- /dev/null +++ b/libs/sqf/tests/lints/s05_if_assign_emoji.sqf @@ -0,0 +1,2 @@ +"🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭"; +private _workingArms = if (alive player) then { 2 } else { 0 }; diff --git a/libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap b/libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap new file mode 100644 index 000000000..daec10565 --- /dev/null +++ b/libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap @@ -0,0 +1,13 @@ +--- +source: libs/sqf/tests/lints.rs +expression: lint(stringify! (s05_if_assign_emoji)) +--- +help[L-S05]: assignment to if can be replaced with select + ┌─ s05_if_assign_emoji.sqf:2:24 + │ +2 │ private _workingArms = if (alive player) then { 2 } else { 0 }; + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ use select + │ + = note: the if and else blocks only return constant values + select is faster in this case + = try: [0, 2] select (alive player) diff --git a/libs/workspace/src/reporting/processed.rs b/libs/workspace/src/reporting/processed.rs index 928794bbe..9b3e286c4 100644 --- a/libs/workspace/src/reporting/processed.rs +++ b/libs/workspace/src/reporting/processed.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, ops::Range, sync::Arc}; +use std::{collections::HashMap, ops::{Range, RangeFrom}, sync::Arc}; use tracing::warn; use crate::{ @@ -289,7 +289,7 @@ impl Processed { } let mut real_start = 0; let mut real_end = 0; - self.output.char_indices().for_each(|(p, c)| { + self.output.chars().enumerate().for_each(|(p, c)| { if p < span.start { real_start += c.len_utf8(); } @@ -299,6 +299,17 @@ impl Processed { }); Arc::from(&self.output[real_start..real_end]) } + + #[must_use] + pub fn extract_from(&self, from: RangeFrom) -> Arc { + let mut real_start = 0; + self.output.chars().enumerate().for_each(|(p, c)| { + if p < from.start { + real_start += c.len_utf8(); + } + }); + Arc::from(&self.output[real_start..]) + } } #[derive(Debug)]