Skip to content

Commit

Permalink
Only escape unterminated BIDI control codes
Browse files Browse the repository at this point in the history
  • Loading branch information
blyxxyz committed Nov 5, 2021
1 parent 0710ed6 commit cc6d652
Show file tree
Hide file tree
Showing 8 changed files with 205 additions and 15 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ jobs:
- run: cargo install cargo-fuzz
# This is too short to catch subtle issues, but it hopefully catches glaring ones.
- run: cargo +nightly fuzz run basic -- -max_len=32 -timeout=1 -max_total_time=60
- run: cargo +nightly fuzz run bidi -- -max_len=32 -timeout=1 -max_total_time=60
- run: cargo +nightly fuzz run shell -- -max_len=32 -timeout=1 -max_total_time=180
- run: cargo +nightly fuzz run powershell -- -max_len=32 -timeout=1 -max_total_time=180

Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## v0.1.2 (unreleased)
- Escape dangerous control codes for bidirectional text. See also: [rustc CVE-2021-42574](https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html).
- Escape dangerous control codes for bidirectional text. See also: [CVE-2021-42574](https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html).

## v0.1.1 (2021-10-14)
- Escape unicode control characters like `U+0085 NEXT LINE` and `U+2028 LINE SEPARATOR`.
Expand Down
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ assert_eq!("\u{200B}".maybe_quote().to_string(), "'\u{200B}'");

It still misleadingly looks like `''` when printed, but it's possible to copy and paste it and get the right result.

## Bidirectional unicode
A carefully-crafted string can move part of itself to the end of the line:
```console
$ wc $'filename\u202E\u2066 [This does not belong!]\u2069\u2066'
wc: 'filename': No such file or directory [This does not belong!]
```
This is known as a [*Trojan Source*](https://trojansource.codes/) attack. It uses control codes for bidirectional text.

`os_display` escapes those control codes if they're not properly terminated.

## Feature flags
By default you can only use the current platform's quoting style. That's appropriate most of the time.

Expand Down Expand Up @@ -140,6 +150,8 @@ The Unix implementation has been [fuzzed](https://github.com/rust-fuzz/cargo-fuz

The PowerShell implementation has been fuzzed against PowerShell Core 7.1.4 running on Linux.

Both implementations have been fuzzed to test their protection against Trojan Source attacks.

## Acknowledgments
This library is modeled after the quoting done by [Gnulib](https://www.gnu.org/software/gnulib/) as seen in the GNU coreutils. The behavior is not identical, however:
- GNU uses octal escapes, like `\377` instead of `\xFF`.
Expand Down
7 changes: 7 additions & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cargo-fuzz = true
libfuzzer-sys = "0.4"
once_cell = "1.8.0"
unicode-width = "0.1.9"
unic-bidi = "0.9.0"

[dependencies.os_display]
path = ".."
Expand All @@ -38,3 +39,9 @@ name = "powershell"
path = "fuzz_targets/powershell.rs"
test = false
doc = false

[[bin]]
name = "bidi"
path = "fuzz_targets/bidi.rs"
test = false
doc = false
60 changes: 60 additions & 0 deletions fuzz/fuzz_targets/bidi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#![no_main]
use libfuzzer_sys::fuzz_target;

use os_display::Quoted;

fn assert_bidi_safe(text: &str) {
// The prefix stops unic_bidi from RTL-ing the context if there are
// RTL codepoints inside text. Without the prefix the suffix may end up
// at the start.

// I think that for certain combinations of terminal, text, and context it
// may be possible to mangle the context using only "normal" RTL codepoints.
// With VTE it only seems possible if the context contains RTL codepoints,
// but other terminals may follow different rules (e.g. Terminal.app).

// The context mangling is unfortunate, but it's not nearly as dangerous as
// a proper trojan-source attack (it's weak and conspicuous), and I think
// it's flat-out impossible to prevent within this crate's constraints.
// Escaping all RTL codepoints renders text unreadable, and adding LTR markers
// means it can't be copied faithfully.

let text = format!("a {} b", text);

let info = unic_bidi::BidiInfo::new(&text, None);
assert_eq!(info.paragraphs.len(), 1);
let para = &info.paragraphs[0];
let reordered = info.reorder_line(para, para.range.clone());

assert!(reordered.ends_with('b'), "{:?} → {:?}", text, reordered);
assert!(reordered.starts_with('a'));
}

const WEIRD_CHARS: &[char] = &[
// Explicitly called out in the paper.
'\u{202A}', '\u{202B}', '\u{202C}', '\u{202D}', '\u{202E}', '\u{2066}', '\u{2067}', '\u{2068}',
'\u{2069}',
// Other bidi. Not as dangerous (and ~equivalent to certain ordinary characters), but can't hurt to test.
'\u{061C}', '\u{200E}', '\u{200F}',
];

fuzz_target!(|data: &[u8]| {
let mut owned = Vec::new();
for ch in data {
match *ch {
b'a'..=b'l' => owned.extend(
WEIRD_CHARS[(*ch - b'a') as usize]
.encode_utf8(&mut [0; 4])
.as_bytes(),
),
_ => owned.push(*ch),
}
}
let data = owned;
let unix = Quoted::unix_raw(&data).force(false).to_string();
assert_bidi_safe(&unix);
if let Ok(text) = String::from_utf8(data) {
let windows = Quoted::windows(&text).force(false).to_string();
assert_bidi_safe(&windows);
}
});
116 changes: 104 additions & 12 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ fn decode_utf16(units: impl IntoIterator<Item = u16>) -> impl Iterator<Item = Re
///
/// This includes all the ASCII control characters.
fn requires_escape(ch: char) -> bool {
ch.is_control() || is_separator(ch) || is_dangerous_bidi(ch)
ch.is_control() || is_separator(ch)
}

/// U+2028 LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR are currently the only
Expand All @@ -241,22 +241,82 @@ fn is_separator(ch: char) -> bool {
/// These two ranges in PropList.txt:
/// LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE
/// LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE
///
/// ARABIC LETTER MARK and LEFT-TO-RIGHT MARK/RIGHT-TO-LEFT MARK don't appear
/// to be dangerous, and escaping them could mess up legitimate text.
///
/// For more info, see this Rust CVE:
/// https://blog.rust-lang.org/2021/11/01/cve-2021-42574.html
///
/// This should be checked again for Unicode 15.0, which will release on
/// September 11, 2022.
fn is_dangerous_bidi(ch: char) -> bool {
fn is_bidi(ch: char) -> bool {
match ch {
'\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' => true,
_ => false,
}
}

/// Check whether text uses bidi in a potentially problematic way.
///
/// See https://trojansource.codes/ and
/// https://www.unicode.org/reports/tr9/tr9-42.html.
///
/// If text fails this check then it's handled by write_escaped(), which
/// escapes these bidi control characters no matter what.
///
/// We can safely assume that there are no newlines (or unicode separators)
/// in the text because those would get it sent to write_escaped() earlier.
/// In unicode terms, this is all a single paragraph.
#[inline(never)]
fn is_suspicious_bidi(text: &str) -> bool {
#[derive(Clone, Copy, PartialEq)]
enum Kind {
Formatting,
Isolate,
}
const STACK_SIZE: usize = 16;
// Can't use a Vec because of no_std
let mut stack: [Option<Kind>; STACK_SIZE] = [None; STACK_SIZE];
let mut pos = 0;
for ch in text.chars() {
match ch {
'\u{202A}' | '\u{202B}' | '\u{202D}' | '\u{202E}' => {
if pos >= STACK_SIZE {
// Suspicious amount of nesting.
return true;
}
stack[pos] = Some(Kind::Formatting);
pos += 1;
}
'\u{202C}' => {
if pos == 0 {
// Unpaired terminator.
// Not necessarily dangerous, but suspicious and
// could disrupt preceding text.
return true;
}
pos -= 1;
if stack[pos] != Some(Kind::Formatting) {
// Terminator doesn't match.
// UAX #9 says to pop the stack until we find a match.
// But we'll keep things simple and cautious.
return true;
}
}
'\u{2066}' | '\u{2067}' | '\u{2068}' => {
if pos >= STACK_SIZE {
return true;
}
stack[pos] = Some(Kind::Isolate);
pos += 1;
}
'\u{2069}' => {
if pos == 0 {
return true;
}
pos -= 1;
if stack[pos] != Some(Kind::Isolate) {
return true;
}
}
_ => (),
}
}
pos != 0
}

#[cfg(feature = "native")]
mod native {
use super::*;
Expand Down Expand Up @@ -344,7 +404,7 @@ mod tests {

use super::*;

use std::string::ToString;
use std::string::{String, ToString};

const BOTH_ALWAYS: &[(&str, &str)] = &[
("foo", "'foo'"),
Expand All @@ -367,6 +427,11 @@ mod tests {
("\u{200B}a", "'\u{200B}a'"),
("a\u{200B}", "a\u{200B}"),
("\u{2000}", "'\u{2000}'"),
// Odd but safe bidi
(
"\u{2067}\u{2066}abc\u{2069}\u{2066}def\u{2069}\u{2069}",
"'\u{2067}\u{2066}abc\u{2069}\u{2066}def\u{2069}\u{2069}'",
),
];

const UNIX_ALWAYS: &[(&str, &str)] = &[
Expand All @@ -387,6 +452,7 @@ mod tests {
("\u{85}", r#"$'\xC2\x85'"#),
("\u{85}a", r#"$'\xC2\x85'$'a'"#),
("\u{2028}", r#"$'\xE2\x80\xA8'"#),
// Dangerous bidi
(
"user\u{202E} \u{2066}// Check if admin\u{2069} \u{2066}",
r#"$'user\xE2\x80\xAE \xE2\x81\xA6// Check if admin\xE2\x81\xA9 \xE2\x81\xA6'"#,
Expand All @@ -409,6 +475,13 @@ mod tests {
for &(orig, expected) in UNIX_RAW {
assert_eq!(Quoted::unix_raw(orig).to_string(), expected);
}
let bidi_ok = nest_bidi(16);
assert_eq!(
Quoted::unix(&bidi_ok).to_string(),
"'".to_string() + &bidi_ok + "'"
);
let bidi_too_deep = nest_bidi(17);
assert!(Quoted::unix(&bidi_too_deep).to_string().starts_with('$'));
}

const WINDOWS_ALWAYS: &[(&str, &str)] = &[
Expand Down Expand Up @@ -450,6 +523,13 @@ mod tests {
for &(orig, expected) in WINDOWS_RAW {
assert_eq!(Quoted::windows_raw(orig).to_string(), expected);
}
let bidi_ok = nest_bidi(16);
assert_eq!(
Quoted::windows(&bidi_ok).to_string(),
"'".to_string() + &bidi_ok + "'"
);
let bidi_too_deep = nest_bidi(17);
assert!(Quoted::windows(&bidi_too_deep).to_string().contains('`'));
}

#[cfg(feature = "native")]
Expand Down Expand Up @@ -511,4 +591,16 @@ mod tests {
Path::new("foo").to_owned().quote();
Cow::Borrowed(Path::new("foo")).quote();
}

fn nest_bidi(n: usize) -> String {
let mut out = String::new();
for _ in 0..n {
out.push('\u{2066}');
}
out.push('a');
for _ in 0..n {
out.push('\u{2069}');
}
out
}
}
10 changes: 9 additions & 1 deletion src/unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub(crate) fn write(f: &mut Formatter<'_>, text: &str, force_quote: bool) -> fmt
let mut is_single_safe = true;
let mut is_double_safe = true;
let mut requires_quote = force_quote;
let mut is_bidi = false;

if !requires_quote {
if let Some(first) = text.chars().next() {
Expand Down Expand Up @@ -75,12 +76,19 @@ pub(crate) fn write(f: &mut Formatter<'_>, text: &str, force_quote: bool) -> fmt
// This check goes stale when new whitespace codepoints are assigned.
requires_quote = true;
}
if crate::is_bidi(ch) {
is_bidi = true;
}
if crate::requires_escape(ch) {
return write_escaped(f, text.as_bytes());
}
}
}

if is_bidi && crate::is_suspicious_bidi(text) {
return write_escaped(f, text.as_bytes());
}

if !requires_quote {
f.write_str(text)
} else if is_single_safe {
Expand Down Expand Up @@ -155,7 +163,7 @@ pub(crate) fn write_escaped(f: &mut Formatter<'_>, text: &[u8]) -> fmt::Result {
// and null bytes can't appear in arguments anyway,
// so let's stay clear of that.
// Some but not all shells have \e for \x1B.
ch if crate::requires_escape(ch) => {
ch if crate::requires_escape(ch) || crate::is_bidi(ch) => {
// Most shells support \uXXXX escape codes, but busybox sh
// doesn't, so we always encode the raw UTF-8. Bit unfortunate,
// but GNU does the same.
Expand Down
12 changes: 11 additions & 1 deletion src/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub(crate) fn write(f: &mut Formatter<'_>, text: &str, force_quote: bool) -> fmt
let mut is_single_safe = true;
let mut is_double_safe = true;
let mut requires_quote = force_quote;
let mut is_bidi = false;

if !requires_quote {
if let Some(first) = text.chars().next() {
Expand Down Expand Up @@ -94,12 +95,19 @@ pub(crate) fn write(f: &mut Formatter<'_>, text: &str, force_quote: bool) -> fmt
is_single_safe = false;
requires_quote = true;
}
if crate::is_bidi(ch) {
is_bidi = true;
}
if crate::requires_escape(ch) {
return write_escaped(f, text.chars().map(Ok));
}
}
}

if is_bidi && crate::is_suspicious_bidi(text) {
return write_escaped(f, text.chars().map(Ok));
}

if !requires_quote {
f.write_str(text)
} else if is_single_safe {
Expand Down Expand Up @@ -163,7 +171,9 @@ pub(crate) fn write_escaped(
'\x08' => f.write_str("`b")?,
'\x0b' => f.write_str("`v")?,
'\x0c' => f.write_str("`f")?,
ch if crate::requires_escape(ch) => write!(f, "`u{{{:02X}}}", ch as u32)?,
ch if crate::requires_escape(ch) || crate::is_bidi(ch) => {
write!(f, "`u{{{:02X}}}", ch as u32)?
}
'`' => f.write_str("``")?,
'$' => f.write_str("`$")?,
ch if unicode::is_double_quote(ch) => {
Expand Down

0 comments on commit cc6d652

Please sign in to comment.