From 32aeea83619a80b46ffe6ecb1387837a4423af28 Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sun, 19 Jan 2025 09:57:29 +0000 Subject: [PATCH 1/3] Add config::rich_no_decorate() This is similar to rich, but for things like bold and code blocks, don't include the textual decorations which can be handled by (for example) terminal attributes. --- examples/html2text.rs | 15 ++++++++++++- src/lib.rs | 5 +++++ src/render/text_renderer.rs | 45 ++++++++++++++++++++++++++++++------- src/tests.rs | 11 +++++++++ 4 files changed, 67 insertions(+), 9 deletions(-) diff --git a/examples/html2text.rs b/examples/html2text.rs index b69c288..e2ff163 100644 --- a/examples/html2text.rs +++ b/examples/html2text.rs @@ -109,7 +109,11 @@ where #[cfg(unix)] { if flags.use_colour { - let conf = config::rich(); + let conf = if flags.no_decorate { + config::rich_no_decorate() + } else { + config::rich() + }; let conf = update_config(conf, &flags); #[cfg(feature = "css")] let use_css_colours = !flags.ignore_css_colours; @@ -163,6 +167,8 @@ struct Flags { wrap_width: Option, #[allow(unused)] use_colour: bool, + #[allow(unused)] + no_decorate: bool, #[cfg(feature = "css")] use_css: bool, #[cfg(feature = "css")] @@ -185,6 +191,7 @@ fn main() { width: 80, wrap_width: None, use_colour: false, + no_decorate: false, #[cfg(feature = "css")] use_css: false, #[cfg(feature = "css")] @@ -231,6 +238,12 @@ fn main() { StoreTrue, "Use ANSI terminal colours", ); + #[cfg(unix)] + ap.refer(&mut flags.no_decorate).add_option( + &["--no-decorate"], + StoreTrue, + "Skip decorations (with --colour)", + ); #[cfg(feature = "css")] ap.refer(&mut flags.use_css) .add_option(&["--css"], StoreTrue, "Enable CSS"); diff --git a/src/lib.rs b/src/lib.rs index 9861f07..c44f276 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2590,6 +2590,11 @@ pub mod config { with_decorator(RichDecorator::new()) } + /// Return a Config initialized with a `RichDecorator` and decoration disabled. + pub fn rich_no_decorate() -> Config { + with_decorator(RichDecorator::new_undecorated()) + } + /// Return a Config initialized with a `PlainDecorator`. pub fn plain() -> Config { with_decorator(PlainDecorator::new()) diff --git a/src/render/text_renderer.rs b/src/render/text_renderer.rs index 063ba1b..1261731 100644 --- a/src/render/text_renderer.rs +++ b/src/render/text_renderer.rs @@ -1865,7 +1865,10 @@ impl TextDecorator for TrivialDecorator { /// A decorator to generate rich text (styled) rather than /// pure text output. #[derive(Clone, Debug)] -pub struct RichDecorator {} +pub struct RichDecorator { + // Don't output decorations around '*bold*' text. + skip_decorations: bool, +} /// Annotation type for "rich" text. Text is associated with a set of /// these. @@ -1896,10 +1899,20 @@ pub enum RichAnnotation { } impl RichDecorator { - /// Create a new `RichDecorator`. + /// Create a new `RichDecorator` with the default settings. #[allow(clippy::new_without_default)] pub fn new() -> RichDecorator { - RichDecorator {} + RichDecorator { + skip_decorations: false, + } + } + + /// Create a new `RichDecorator` which doesn't add decorations + /// when terminal formatting can be used. + pub fn new_undecorated() -> RichDecorator { + RichDecorator { + skip_decorations: true, + } } } @@ -1923,11 +1936,19 @@ impl TextDecorator for RichDecorator { } fn decorate_strong_start(&self) -> (String, Self::Annotation) { - ("*".to_string(), RichAnnotation::Strong) + if self.skip_decorations { + ("".to_string(), RichAnnotation::Strong) + } else { + ("*".to_string(), RichAnnotation::Strong) + } } fn decorate_strong_end(&self) -> String { - "*".to_string() + if self.skip_decorations { + "".to_string() + } else { + "*".to_string() + } } fn decorate_strikeout_start(&self) -> (String, Self::Annotation) { @@ -1939,11 +1960,19 @@ impl TextDecorator for RichDecorator { } fn decorate_code_start(&self) -> (String, Self::Annotation) { - ("`".to_string(), RichAnnotation::Code) + if self.skip_decorations { + ("".to_string(), RichAnnotation::Code) + } else { + ("`".to_string(), RichAnnotation::Code) + } } fn decorate_code_end(&self) -> String { - "`".to_string() + if self.skip_decorations { + "".to_string() + } else { + "`".to_string() + } } fn decorate_preformat_first(&self) -> Self::Annotation { @@ -1979,7 +2008,7 @@ impl TextDecorator for RichDecorator { } fn make_subblock_decorator(&self) -> Self { - RichDecorator::new() + self.clone() } fn push_colour(&mut self, colour: Colour) -> Option { diff --git a/src/tests.rs b/src/tests.rs index 3787b49..cf19aee 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1393,6 +1393,17 @@ fn test_read_rich() { assert_eq!(vec![line], lines); } +#[test] +fn test_read_rich_nodecorate() { + let html: &[u8] = b"bold"; + let lines = config::rich_no_decorate() + .render_to_lines(parse(html).unwrap(), 80) + .unwrap(); + let tag = vec![RichAnnotation::Strong]; + let line = TaggedLine::from_string("bold".to_owned(), &tag); + assert_eq!(vec![line], lines); +} + #[test] fn test_read_custom() { let html: &[u8] = b"bold"; From 46f39d369d3d8a0573a33a9f993cd18bdc0cd010 Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sun, 19 Jan 2025 10:07:18 +0000 Subject: [PATCH 2/3] Update cargo-semver-checks action version. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 62b60f4..94bf914 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,6 @@ jobs: override: true - name: Check semver - uses: obi1kenobi/cargo-semver-checks-action@v1 + uses: obi1kenobi/cargo-semver-checks-action@v2 with: version-tag-prefix: '' From 64af8c96c8ece90e20663435174709ac38b1ca94 Mon Sep 17 00:00:00 2001 From: Chris Emerson Date: Sun, 19 Jan 2025 10:13:08 +0000 Subject: [PATCH 3/3] Updates to 0.14.0 for the semver-breaking change. --- CHANGELOG.md | 6 ++++++ Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccac5fb..dfe354e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,12 @@ Possible log types: ### Latest +### 0.14.0 + +- [changed] Various small refactors (thanks sftse) +- [added] New `config::rich_no_decorate`, to use annotations without '\*' markers around + bold text etc. + ### 0.13.6 - [fixed] Fixed issue parsing CSS rules with known rules but unknown values, diff --git a/Cargo.toml b/Cargo.toml index 4bb981e..737475f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "html2text" -version = "0.13.6" +version = "0.14.0" authors = ["Chris Emerson "] description = "Render HTML as plain text." repository = "https://github.com/jugglerchris/rust-html2text/"