Skip to content

Commit c25d1e6

Browse files
committed
Rename the unescaping functions.
`unescape_literal` becomes `unescape_unicode`, and `unescape_c_string` becomes `unescape_mixed`. Because rfc3349 will mean that C string literals will no longer be the only mixed utf8 literals.
1 parent 9ab1fe0 commit c25d1e6

File tree

9 files changed

+45
-42
lines changed

9 files changed

+45
-42
lines changed

compiler/rustc_ast/src/util/literal.rs

+4-5
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,7 @@
33
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
44
use crate::token::{self, Token};
55
use rustc_lexer::unescape::{
6-
byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, MixedUnit,
7-
Mode,
6+
byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode,
87
};
98
use rustc_span::symbol::{kw, sym, Symbol};
109
use rustc_span::Span;
@@ -85,7 +84,7 @@ impl LitKind {
8584
// Force-inlining here is aggressive but the closure is
8685
// called on every char in the string, so it can be hot in
8786
// programs with many long strings containing escapes.
88-
unescape_literal(
87+
unescape_unicode(
8988
s,
9089
Mode::Str,
9190
&mut #[inline(always)]
@@ -109,7 +108,7 @@ impl LitKind {
109108
token::ByteStr => {
110109
let s = symbol.as_str();
111110
let mut buf = Vec::with_capacity(s.len());
112-
unescape_literal(s, Mode::ByteStr, &mut |_, c| match c {
111+
unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
113112
Ok(c) => buf.push(byte_from_char(c)),
114113
Err(err) => {
115114
assert!(!err.is_fatal(), "failed to unescape string literal")
@@ -126,7 +125,7 @@ impl LitKind {
126125
token::CStr => {
127126
let s = symbol.as_str();
128127
let mut buf = Vec::with_capacity(s.len());
129-
unescape_c_string(s, Mode::CStr, &mut |_span, c| match c {
128+
unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
130129
Ok(MixedUnit::Char(c)) => {
131130
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
132131
}

compiler/rustc_lexer/src/unescape.rs

+8-4
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,12 @@ impl EscapeError {
8080
}
8181
}
8282

83-
/// Takes a contents of a literal (without quotes) and produces a sequence of
84-
/// escaped characters or errors.
83+
/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without
84+
/// quotes) and produces a sequence of escaped characters or errors.
8585
///
8686
/// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
8787
/// the callback will be called exactly once.
88-
pub fn unescape_literal<F>(src: &str, mode: Mode, callback: &mut F)
88+
pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
8989
where
9090
F: FnMut(Range<usize>, Result<char, EscapeError>),
9191
{
@@ -132,7 +132,11 @@ impl From<u8> for MixedUnit {
132132
}
133133
}
134134

135-
pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
135+
/// Takes the contents of a mixed-utf8 literal (without quotes) and produces
136+
/// a sequence of escaped characters or errors.
137+
///
138+
/// Values are returned by invoking `callback`.
139+
pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
136140
where
137141
F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
138142
{

compiler/rustc_lexer/src/unescape/tests.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ fn test_unescape_char_good() {
100100
fn test_unescape_str_warn() {
101101
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
102102
let mut unescaped = Vec::with_capacity(literal.len());
103-
unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
103+
unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
104104
assert_eq!(unescaped, expected);
105105
}
106106

@@ -124,7 +124,7 @@ fn test_unescape_str_warn() {
124124
fn test_unescape_str_good() {
125125
fn check(literal_text: &str, expected: &str) {
126126
let mut buf = Ok(String::with_capacity(literal_text.len()));
127-
unescape_literal(literal_text, Mode::Str, &mut |range, c| {
127+
unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
128128
if let Ok(b) = &mut buf {
129129
match c {
130130
Ok(c) => b.push(c),
@@ -241,7 +241,7 @@ fn test_unescape_byte_good() {
241241
fn test_unescape_byte_str_good() {
242242
fn check(literal_text: &str, expected: &[u8]) {
243243
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
244-
unescape_literal(literal_text, Mode::ByteStr, &mut |range, c| {
244+
unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
245245
if let Ok(b) = &mut buf {
246246
match c {
247247
Ok(c) => b.push(byte_from_char(c)),
@@ -264,7 +264,7 @@ fn test_unescape_byte_str_good() {
264264
fn test_unescape_raw_str() {
265265
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
266266
let mut unescaped = Vec::with_capacity(literal.len());
267-
unescape_literal(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
267+
unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
268268
assert_eq!(unescaped, expected);
269269
}
270270

@@ -276,7 +276,7 @@ fn test_unescape_raw_str() {
276276
fn test_unescape_raw_byte_str() {
277277
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
278278
let mut unescaped = Vec::with_capacity(literal.len());
279-
unescape_literal(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
279+
unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
280280
assert_eq!(unescaped, expected);
281281
}
282282

compiler/rustc_parse/src/lexer/mod.rs

+12-12
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
400400
.with_code(error_code!(E0762))
401401
.emit()
402402
}
403-
self.cook_quoted(token::Char, Mode::Char, start, end, 1, 1) // ' '
403+
self.cook_unicode(token::Char, Mode::Char, start, end, 1, 1) // ' '
404404
}
405405
rustc_lexer::LiteralKind::Byte { terminated } => {
406406
if !terminated {
@@ -412,7 +412,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
412412
.with_code(error_code!(E0763))
413413
.emit()
414414
}
415-
self.cook_quoted(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
415+
self.cook_unicode(token::Byte, Mode::Byte, start, end, 2, 1) // b' '
416416
}
417417
rustc_lexer::LiteralKind::Str { terminated } => {
418418
if !terminated {
@@ -424,7 +424,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
424424
.with_code(error_code!(E0765))
425425
.emit()
426426
}
427-
self.cook_quoted(token::Str, Mode::Str, start, end, 1, 1) // " "
427+
self.cook_unicode(token::Str, Mode::Str, start, end, 1, 1) // " "
428428
}
429429
rustc_lexer::LiteralKind::ByteStr { terminated } => {
430430
if !terminated {
@@ -436,7 +436,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
436436
.with_code(error_code!(E0766))
437437
.emit()
438438
}
439-
self.cook_quoted(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
439+
self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
440440
}
441441
rustc_lexer::LiteralKind::CStr { terminated } => {
442442
if !terminated {
@@ -448,13 +448,13 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
448448
.with_code(error_code!(E0767))
449449
.emit()
450450
}
451-
self.cook_c_string(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
451+
self.cook_mixed(token::CStr, Mode::CStr, start, end, 2, 1) // c" "
452452
}
453453
rustc_lexer::LiteralKind::RawStr { n_hashes } => {
454454
if let Some(n_hashes) = n_hashes {
455455
let n = u32::from(n_hashes);
456456
let kind = token::StrRaw(n_hashes);
457-
self.cook_quoted(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
457+
self.cook_unicode(kind, Mode::RawStr, start, end, 2 + n, 1 + n) // r##" "##
458458
} else {
459459
self.report_raw_str_error(start, 1);
460460
}
@@ -463,7 +463,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
463463
if let Some(n_hashes) = n_hashes {
464464
let n = u32::from(n_hashes);
465465
let kind = token::ByteStrRaw(n_hashes);
466-
self.cook_quoted(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
466+
self.cook_unicode(kind, Mode::RawByteStr, start, end, 3 + n, 1 + n) // br##" "##
467467
} else {
468468
self.report_raw_str_error(start, 2);
469469
}
@@ -472,7 +472,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
472472
if let Some(n_hashes) = n_hashes {
473473
let n = u32::from(n_hashes);
474474
let kind = token::CStrRaw(n_hashes);
475-
self.cook_c_string(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
475+
self.cook_mixed(kind, Mode::RawCStr, start, end, 3 + n, 1 + n) // cr##" "##
476476
} else {
477477
self.report_raw_str_error(start, 2);
478478
}
@@ -735,7 +735,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
735735
}
736736
}
737737

738-
fn cook_quoted(
738+
fn cook_unicode(
739739
&self,
740740
kind: token::LitKind,
741741
mode: Mode,
@@ -745,13 +745,13 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
745745
postfix_len: u32,
746746
) -> (token::LitKind, Symbol) {
747747
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
748-
unescape::unescape_literal(src, mode, &mut |span, result| {
748+
unescape::unescape_unicode(src, mode, &mut |span, result| {
749749
callback(span, result.map(drop))
750750
})
751751
})
752752
}
753753

754-
fn cook_c_string(
754+
fn cook_mixed(
755755
&self,
756756
kind: token::LitKind,
757757
mode: Mode,
@@ -761,7 +761,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
761761
postfix_len: u32,
762762
) -> (token::LitKind, Symbol) {
763763
self.cook_common(kind, mode, start, end, prefix_len, postfix_len, |src, mode, callback| {
764-
unescape::unescape_c_string(src, mode, &mut |span, result| {
764+
unescape::unescape_mixed(src, mode, &mut |span, result| {
765765
callback(span, result.map(drop))
766766
})
767767
})

compiler/rustc_parse_format/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,7 @@ fn find_width_map_from_snippet(
10561056
fn unescape_string(string: &str) -> Option<string::String> {
10571057
let mut buf = string::String::new();
10581058
let mut ok = true;
1059-
unescape::unescape_literal(string, unescape::Mode::Str, &mut |_, unescaped_char| {
1059+
unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
10601060
match unescaped_char {
10611061
Ok(c) => buf.push(c),
10621062
Err(_) => ok = false,

src/tools/clippy/clippy_dev/src/update_lints.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,7 @@ fn remove_line_splices(s: &str) -> String {
928928
.and_then(|s| s.strip_suffix('"'))
929929
.unwrap_or_else(|| panic!("expected quoted string, found `{s}`"));
930930
let mut res = String::with_capacity(s.len());
931-
unescape::unescape_literal(s, unescape::Mode::Str, &mut |range, ch| {
931+
unescape::unescape_unicode(s, unescape::Mode::Str, &mut |range, ch| {
932932
if ch.is_ok() {
933933
res.push_str(&s[range]);
934934
}

src/tools/rust-analyzer/crates/parser/src/lexed_str.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -379,14 +379,14 @@ fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
379379
let mut error_message = "";
380380
match mode {
381381
Mode::CStr => {
382-
rustc_lexer::unescape::unescape_c_string(text, mode, &mut |_, res| {
382+
rustc_lexer::unescape::unescape_mixed(text, mode, &mut |_, res| {
383383
if let Err(e) = res {
384384
error_message = error_to_diagnostic_message(e, mode);
385385
}
386386
});
387387
}
388388
Mode::ByteStr | Mode::Str => {
389-
rustc_lexer::unescape::unescape_literal(text, mode, &mut |_, res| {
389+
rustc_lexer::unescape::unescape_unicode(text, mode, &mut |_, res| {
390390
if let Err(e) = res {
391391
error_message = error_to_diagnostic_message(e, mode);
392392
}

src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::{
66
};
77

88
use rustc_lexer::unescape::{
9-
unescape_byte, unescape_c_string, unescape_char, unescape_literal, MixedUnit, Mode,
9+
unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode,
1010
};
1111

1212
use crate::{
@@ -193,7 +193,7 @@ pub trait IsString: AstToken {
193193
let text = &self.text()[text_range_no_quotes - start];
194194
let offset = text_range_no_quotes.start() - start;
195195

196-
unescape_literal(text, Self::MODE, &mut |range, unescaped_char| {
196+
unescape_unicode(text, Self::MODE, &mut |range, unescaped_char| {
197197
let text_range =
198198
TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
199199
cb(text_range + offset, unescaped_char);
@@ -226,7 +226,7 @@ impl ast::String {
226226
let mut buf = String::new();
227227
let mut prev_end = 0;
228228
let mut has_error = false;
229-
unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
229+
unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
230230
unescaped_char,
231231
buf.capacity() == 0,
232232
) {
@@ -270,7 +270,7 @@ impl ast::ByteString {
270270
let mut buf: Vec<u8> = Vec::new();
271271
let mut prev_end = 0;
272272
let mut has_error = false;
273-
unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
273+
unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
274274
unescaped_char,
275275
buf.capacity() == 0,
276276
) {
@@ -311,7 +311,7 @@ impl IsString for ast::CString {
311311
let text = &self.text()[text_range_no_quotes - start];
312312
let offset = text_range_no_quotes.start() - start;
313313

314-
unescape_c_string(text, Self::MODE, &mut |range, unescaped_char| {
314+
unescape_mixed(text, Self::MODE, &mut |range, unescaped_char| {
315315
let text_range =
316316
TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
317317
// XXX: This method should only be used for highlighting ranges. The unescaped
@@ -340,7 +340,7 @@ impl ast::CString {
340340
MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
341341
MixedUnit::HighByte(b) => buf.push(b),
342342
};
343-
unescape_c_string(text, Self::MODE, &mut |char_range, unescaped| match (
343+
unescape_mixed(text, Self::MODE, &mut |char_range, unescaped| match (
344344
unescaped,
345345
buf.capacity() == 0,
346346
) {

src/tools/rust-analyzer/crates/syntax/src/validation.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
mod block;
66

77
use rowan::Direction;
8-
use rustc_lexer::unescape::{self, unescape_c_string, unescape_literal, Mode};
8+
use rustc_lexer::unescape::{self, unescape_mixed, unescape_unicode, Mode};
99

1010
use crate::{
1111
algo,
@@ -140,7 +140,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
140140
ast::LiteralKind::String(s) => {
141141
if !s.is_raw() {
142142
if let Some(without_quotes) = unquote(text, 1, '"') {
143-
unescape_literal(without_quotes, Mode::Str, &mut |range, char| {
143+
unescape_unicode(without_quotes, Mode::Str, &mut |range, char| {
144144
if let Err(err) = char {
145145
push_err(1, range.start, err);
146146
}
@@ -151,7 +151,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
151151
ast::LiteralKind::ByteString(s) => {
152152
if !s.is_raw() {
153153
if let Some(without_quotes) = unquote(text, 2, '"') {
154-
unescape_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
154+
unescape_unicode(without_quotes, Mode::ByteStr, &mut |range, char| {
155155
if let Err(err) = char {
156156
push_err(1, range.start, err);
157157
}
@@ -162,7 +162,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
162162
ast::LiteralKind::CString(s) => {
163163
if !s.is_raw() {
164164
if let Some(without_quotes) = unquote(text, 2, '"') {
165-
unescape_c_string(without_quotes, Mode::CStr, &mut |range, char| {
165+
unescape_mixed(without_quotes, Mode::CStr, &mut |range, char| {
166166
if let Err(err) = char {
167167
push_err(1, range.start, err);
168168
}
@@ -172,7 +172,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
172172
}
173173
ast::LiteralKind::Char(_) => {
174174
if let Some(without_quotes) = unquote(text, 1, '\'') {
175-
unescape_literal(without_quotes, Mode::Char, &mut |range, char| {
175+
unescape_unicode(without_quotes, Mode::Char, &mut |range, char| {
176176
if let Err(err) = char {
177177
push_err(1, range.start, err);
178178
}
@@ -181,7 +181,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
181181
}
182182
ast::LiteralKind::Byte(_) => {
183183
if let Some(without_quotes) = unquote(text, 2, '\'') {
184-
unescape_literal(without_quotes, Mode::Byte, &mut |range, char| {
184+
unescape_unicode(without_quotes, Mode::Byte, &mut |range, char| {
185185
if let Err(err) = char {
186186
push_err(2, range.start, err);
187187
}

0 commit comments

Comments
 (0)