From 5cd4feb94bb57b286d4e120214f71d8222802747 Mon Sep 17 00:00:00 2001 From: BrettMayson Date: Tue, 22 Oct 2024 11:26:16 -0600 Subject: [PATCH] fix multibyte character counting in processed (#812) * fix umultibyte character counting in processed * fix unicode handling * fmt --- libs/config/src/analyze/chumsky.rs | 2 +- .../src/analyze/lints/c01_invalid_value.rs | 4 +- .../src/analyze/lints/c07_expected_array.rs | 2 +- .../analyze/lints/c08_missing_semicolon.rs | 2 +- .../analyze/lints/c10_class_missing_braces.rs | 2 +- libs/sqf/src/analyze/lints/s05_if_assign.rs | 2 +- libs/sqf/src/parser/lexer.rs | 6 +-- libs/sqf/tests/lints.rs | 1 + libs/sqf/tests/lints/s05_if_assign_emoji.sqf | 2 + .../lints__simple_s05_if_assign_emoji.snap | 13 +++++ .../lints__simple_s08_format_args.snap | 50 ++++++++----------- libs/workspace/src/reporting/processed.rs | 28 ++++++++--- 12 files changed, 68 insertions(+), 46 deletions(-) create mode 100644 libs/sqf/tests/lints/s05_if_assign_emoji.sqf create mode 100644 libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap diff --git a/libs/config/src/analyze/chumsky.rs b/libs/config/src/analyze/chumsky.rs index 333d7739..2dfca842 100644 --- a/libs/config/src/analyze/chumsky.rs +++ b/libs/config/src/analyze/chumsky.rs @@ -55,7 +55,7 @@ impl ChumskyCode { { end += 1; } - &processed.as_str()[start..end] + processed.extract(start..end).to_string() } )); } diff --git a/libs/config/src/analyze/lints/c01_invalid_value.rs b/libs/config/src/analyze/lints/c01_invalid_value.rs index 9935c45c..278926c6 100644 --- a/libs/config/src/analyze/lints/c01_invalid_value.rs +++ b/libs/config/src/analyze/lints/c01_invalid_value.rs @@ -159,7 +159,7 @@ impl CodeC01InvalidValue { #[must_use] pub fn new(span: Range, processed: &Processed) -> Self { Self { - value: processed.as_str()[span.clone()].to_string(), + value: processed.extract(span.clone()).to_string(), span, diagnostic: None, } @@ -218,7 +218,7 @@ impl CodeC01InvalidValueMacro { if let Some(diag) = &mut self.diagnostic { diag.notes.push(format!( "The processed output was:\n{} ", - &processed.as_str()[self.span.start..self.span.end] + processed.extract(self.span.clone()) )); } self diff --git a/libs/config/src/analyze/lints/c07_expected_array.rs b/libs/config/src/analyze/lints/c07_expected_array.rs index c73dc5a7..94defd3e 100644 --- a/libs/config/src/analyze/lints/c07_expected_array.rs +++ b/libs/config/src/analyze/lints/c07_expected_array.rs @@ -166,7 +166,7 @@ impl Code07ExpectedArray { let ident_end = processed .mapping(name.span.end) .expect("mapping should exist"); - let haystack = &processed.as_str()[ident_end.original_start()..value.span().start]; + let haystack = &processed.extract(ident_end.original_start()..value.span().start); let possible_end = ident_end.original_start() + haystack.find(']').unwrap_or(1) + 1; self.suggestion = Some(name.value.to_string()); self.diagnostic = Diagnostic::from_code_processed( diff --git a/libs/config/src/analyze/lints/c08_missing_semicolon.rs b/libs/config/src/analyze/lints/c08_missing_semicolon.rs index 152acf6b..5915daee 100644 --- a/libs/config/src/analyze/lints/c08_missing_semicolon.rs +++ b/libs/config/src/analyze/lints/c08_missing_semicolon.rs @@ -140,7 +140,7 @@ impl Code08MissingSemicolon { } fn generate_processed(mut self, processed: &Processed) -> Self { - let haystack = &processed.as_str()[self.span.clone()]; + let haystack = &processed.extract(self.span.clone()); let possible_end = self.span.start + haystack .find('\n') diff --git a/libs/config/src/analyze/lints/c10_class_missing_braces.rs b/libs/config/src/analyze/lints/c10_class_missing_braces.rs index 56d55eab..30e91530 100644 --- a/libs/config/src/analyze/lints/c10_class_missing_braces.rs +++ b/libs/config/src/analyze/lints/c10_class_missing_braces.rs @@ -128,7 +128,7 @@ impl Code10ClassMissingBraces { } fn generate_processed(mut self, processed: &Processed) -> Self { - let haystack = &processed.as_str()[self.span.clone()]; + let haystack = &processed.extract(self.span.clone()); let possible_end = self.span.start + haystack .find('\n') diff --git a/libs/sqf/src/analyze/lints/s05_if_assign.rs b/libs/sqf/src/analyze/lints/s05_if_assign.rs index 698ff204..76b5f373 100644 --- a/libs/sqf/src/analyze/lints/s05_if_assign.rs +++ b/libs/sqf/src/analyze/lints/s05_if_assign.rs @@ -202,7 +202,7 @@ impl CodeS05IfAssign { } fn generate_processed(mut self, processed: &Processed) -> Self { - let haystack = &processed.as_str()[self.rhs.1.end..]; + let haystack = &processed.extract_from(self.rhs.1.end..); let end_position = self.rhs.1.end + haystack.find('}').unwrap_or(0) + 1; self.diagnostic = Diagnostic::from_code_processed(&self, self.if_cmd.start..end_position, processed); diff --git a/libs/sqf/src/parser/lexer.rs b/libs/sqf/src/parser/lexer.rs index 221cefe9..fdf7aa96 100644 --- a/libs/sqf/src/parser/lexer.rs +++ b/libs/sqf/src/parser/lexer.rs @@ -9,9 +9,9 @@ use crate::StringWrapper; pub type Tokens = Vec<(Token, Range)>; macro_rules! chain_collect { - ($Collect:ty: $($value:expr),+ $(,)?) => { - std::iter::empty()$(.chain($value))+.collect::<$Collect>() - }; + ($Collect:ty: $($value:expr),+ $(,)?) => { + std::iter::empty()$(.chain($value))+.collect::<$Collect>() + }; } pub fn strip_comments(tokens: &mut Tokens) { diff --git a/libs/sqf/tests/lints.rs b/libs/sqf/tests/lints.rs index 484bffc0..01e91605 100644 --- a/libs/sqf/tests/lints.rs +++ b/libs/sqf/tests/lints.rs @@ -24,6 +24,7 @@ lint!(s02_event_handler_case); lint!(s03_static_typename); lint!(s04_command_case); lint!(s05_if_assign); +lint!(s05_if_assign_emoji); lint!(s06_find_in_str); lint!(s07_select_parse_number); lint!(s08_format_args); diff --git a/libs/sqf/tests/lints/s05_if_assign_emoji.sqf b/libs/sqf/tests/lints/s05_if_assign_emoji.sqf new file mode 100644 index 00000000..7f30c2ec --- /dev/null +++ b/libs/sqf/tests/lints/s05_if_assign_emoji.sqf @@ -0,0 +1,2 @@ +"🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭🌭"; +private _workingArms = if (alive player) then { 2 } else { 0 }; diff --git a/libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap b/libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap new file mode 100644 index 00000000..daec1056 --- /dev/null +++ b/libs/sqf/tests/snapshots/lints__simple_s05_if_assign_emoji.snap @@ -0,0 +1,13 @@ +--- +source: libs/sqf/tests/lints.rs +expression: lint(stringify! (s05_if_assign_emoji)) +--- +help[L-S05]: assignment to if can be replaced with select + ┌─ s05_if_assign_emoji.sqf:2:24 + │ +2 │ private _workingArms = if (alive player) then { 2 } else { 0 }; + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ use select + │ + = note: the if and else blocks only return constant values + select is faster in this case + = try: [0, 2] select (alive player) diff --git a/libs/sqf/tests/snapshots/lints__simple_s08_format_args.snap b/libs/sqf/tests/snapshots/lints__simple_s08_format_args.snap index 2e621dbc..26a08d69 100644 --- a/libs/sqf/tests/snapshots/lints__simple_s08_format_args.snap +++ b/libs/sqf/tests/snapshots/lints__simple_s08_format_args.snap @@ -3,45 +3,35 @@ source: libs/sqf/tests/lints.rs expression: lint(stringify! (s08_format_args)) --- error[L-S08]: format string: empty array - ┌─ s08_format_args.sqf:4:21 - │ -4 │ format [" • %1", 1]; - │ ╭─────────────────────^ -5 │ │ format []; // empty array - │ ╰───────^ format string: empty array + ┌─ s08_format_args.sqf:5:1 + │ +5 │ format []; // empty array + │ ^^^^^^^^^ format string: empty array error[L-S08]: format string: unused args [used "%1", passed 3] - ┌─ s08_format_args.sqf:5:11 - │ -5 │ format []; // empty array - │ ╭───────────^ -6 │ │ format ["%1", 1, 2, 3]; // unused args - │ ╰───────────────────^ format string: unused args [used "%1", passed 3] + ┌─ s08_format_args.sqf:6:1 + │ +6 │ format ["%1", 1, 2, 3]; // unused args + │ ^^^^^^^^^^^^^^^^^^^^^ format string: unused args [used "%1", passed 3] error[L-S08]: format string: undefined tokens [used "%2", passed 1] - ┌─ s08_format_args.sqf:6:24 - │ -6 │ format ["%1", 1, 2, 3]; // unused args - │ ╭────────────────────────^ -7 │ │ format ["%1%2", 1]; // undefined tokens - │ ╰───────────────^ format string: undefined tokens [used "%2", passed 1] + ┌─ s08_format_args.sqf:7:1 + │ +7 │ format ["%1%2", 1]; // undefined tokens + │ ^^^^^^^^^^^^^^^^^ format string: undefined tokens [used "%2", passed 1] error[L-S08]: format string: skipped tokens [used "%5", but only 1 tokens] - ┌─ s08_format_args.sqf:7:20 - │ -7 │ format ["%1%2", 1]; // undefined tokens - │ ╭────────────────────^ -8 │ │ format ["%5", 1, 2 ,3 ,4, 5]; // skipped tokens - │ ╰─────────────────────────^ format string: skipped tokens [used "%5", but only 1 tokens] + ┌─ s08_format_args.sqf:8:1 + │ +8 │ format ["%5", 1, 2 ,3 ,4, 5]; // skipped tokens + │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ format string: skipped tokens [used "%5", but only 1 tokens] error[L-S08]: format string: undefined tokens [used "%1", passed 0] - ┌─ s08_format_args.sqf:8:30 - │ -8 │ format ["%5", 1, 2 ,3 ,4, 5]; // skipped tokens - │ ╭──────────────────────────────^ -9 │ │ formatText ["me too %1"]; - │ ╰─────────────────────^ format string: undefined tokens [used "%1", passed 0] + ┌─ s08_format_args.sqf:9:1 + │ +9 │ formatText ["me too %1"]; + │ ^^^^^^^^^^^^^^^^^^^^^^^ format string: undefined tokens [used "%1", passed 0] diff --git a/libs/workspace/src/reporting/processed.rs b/libs/workspace/src/reporting/processed.rs index 81db701e..35f2c891 100644 --- a/libs/workspace/src/reporting/processed.rs +++ b/libs/workspace/src/reporting/processed.rs @@ -1,4 +1,8 @@ -use std::{collections::HashMap, ops::Range, sync::Arc}; +use std::{ + collections::HashMap, + ops::{Range, RangeFrom}, + sync::Arc, +}; use tracing::warn; use crate::{ @@ -106,12 +110,13 @@ fn append_token( } processed.mappings.push(Mapping { processed: (LineCol(processed.total, (processed.line, processed.col)), { - processed.col += str.len(); - processed.total += str.len(); + let chars = str.chars().count(); + processed.col += chars; + processed.total += chars; processed.output.push_str(&str); LineCol( - processed.total + str.len(), - (processed.line, processed.col + str.len()), + processed.total + chars, + (processed.line, processed.col + chars), ) }), source, @@ -288,7 +293,7 @@ impl Processed { } let mut real_start = 0; let mut real_end = 0; - self.output.char_indices().for_each(|(p, c)| { + self.output.chars().enumerate().for_each(|(p, c)| { if p < span.start { real_start += c.len_utf8(); } @@ -298,6 +303,17 @@ impl Processed { }); Arc::from(&self.output[real_start..real_end]) } + + #[must_use] + pub fn extract_from(&self, from: RangeFrom) -> Arc { + let mut real_start = 0; + self.output.chars().enumerate().for_each(|(p, c)| { + if p < from.start { + real_start += c.len_utf8(); + } + }); + Arc::from(&self.output[real_start..]) + } } #[derive(Debug)]