From fde1cb237120e9ba93cd0d0b9bf6443a90b158d6 Mon Sep 17 00:00:00 2001 From: Ronny Chevalier Date: Wed, 17 Jul 2024 00:27:58 +0200 Subject: [PATCH] feat: start to support typos config files To ease usability, it now supports loading the relevant fields from a config file of `typos`. It ignores the irrelevant fields. --- Cargo.lock | 84 ++++++++++ Cargo.toml | 3 + src/cli.rs | 96 +++++++----- src/config.rs | 420 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + src/main.rs | 19 ++- 6 files changed, 584 insertions(+), 39 deletions(-) create mode 100644 src/config.rs diff --git a/Cargo.lock b/Cargo.lock index 8e2385d..7693371 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -204,6 +204,12 @@ version = "0.8.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.9" @@ -233,6 +239,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "heck" version = "0.5.0" @@ -255,6 +267,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is_ci" version = "1.2.0" @@ -267,6 +289,16 @@ version = "1.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" +[[package]] +name = "kstring" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3066350882a1cd6d950d055997f379ac37fd39f81cd4d8ed186032eb3c5747" +dependencies = [ + "serde", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.155" @@ -347,8 +379,11 @@ dependencies = [ "anyhow", "clap", "ignore", + "kstring", "miette", + "serde", "thiserror", + "toml", "tree-sitter", "tree-sitter-c", "tree-sitter-cpp", @@ -463,12 +498,27 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_spanned" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79e674e01f999af37c49f70a6ede167a8a60b2503e56c5599532a65baa5969a0" +dependencies = [ + "serde", +] + [[package]] name = "smawk" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -548,6 +598,40 @@ dependencies = [ "syn", ] +[[package]] +name = "toml" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4badfd56924ae69bcc9039335b2e017639ce3f9b001c393c1b2d1ef846ce2cbf" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d59a3a72298453f564e2b111fa896f8d07fabb36f51f06d7e875fc5e0b5a3ef1" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "tree-sitter" version = "0.22.6" diff --git a/Cargo.toml b/Cargo.toml index 4bbd5e0..62b8324 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,8 +35,11 @@ lang-yaml = ["dep:tree-sitter-yaml"] anyhow = "1.0.86" clap = { version = "4.5.9", features = ["derive"] } ignore = "0.4.22" +kstring = { version = "2.0.0", features = ["serde"] } miette = { version = "7.2.0", features = ["fancy"] } +serde = { version = "1.0.204", features = ["derive"] } thiserror = "1.0.61" +toml = "0.8.14" tree-sitter = "0.22.6" tree-sitter-c = { version = "0.21.4", optional = true } tree-sitter-cpp = { version = "0.22.2", optional = true } diff --git a/src/cli.rs b/src/cli.rs index cec3d2c..78eab99 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,8 +1,10 @@ use std::fs::Metadata; -use std::path::Path; use std::path::PathBuf; -use ignore::{DirEntry, WalkBuilder}; +use ignore::DirEntry; + +use orthotypos::config; +use orthotypos::config::Config; #[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum, Default)] pub enum Format { @@ -39,8 +41,53 @@ pub(crate) struct Args { walk: WalkArgs, } +impl Args { + pub fn to_walk<'a>( + &'a self, + config: &'a Config, + ) -> anyhow::Result + 'a> { + let mut overrides = ignore::overrides::OverrideBuilder::new("."); + for pattern in &config.files.extend_exclude { + overrides.add(&format!("!{}", pattern))?; + } + let overrides = overrides.build()?; + + Ok(self.path.iter().flat_map(move |path| { + let mut walk = config.to_walk_builder(path); + if self.sort { + walk.sort_by_file_name(|a, b| a.cmp(b)); + } + if !config.files.extend_exclude.is_empty() { + walk.overrides(overrides.clone()); + } + walk.build().filter_map(Result::ok).filter(|entry| { + entry + .metadata() + .as_ref() + .map(Metadata::is_file) + .unwrap_or(false) + }) + })) + } + + pub fn format(&self) -> Format { + self.format + } + + pub fn to_config(&self) -> config::Config { + config::Config { + files: self.walk.to_config(), + ..Default::default() + } + } +} + #[derive(clap::Args)] struct WalkArgs { + /// Ignore files and directories matching the glob. + #[arg(long, value_name = "GLOB")] + exclude: Vec, + /// Search hidden files and directories #[arg(long)] hidden: bool, @@ -66,41 +113,16 @@ struct WalkArgs { no_ignore_vcs: bool, } -impl Args { - pub fn to_walk_builder(&self, path: &Path) -> WalkBuilder { - let mut walk = ignore::WalkBuilder::new(path); - walk.skip_stdout(true) - .git_global( - !(self.walk.no_ignore_global || self.walk.no_ignore_vcs || self.walk.no_ignore), - ) - .git_ignore(!self.walk.no_ignore_vcs || self.walk.no_ignore) - .git_exclude(!self.walk.no_ignore_vcs || self.walk.no_ignore) - .hidden(self.walk.hidden) - .parents(!(self.walk.no_ignore_parent || self.walk.no_ignore)) - .ignore(!(self.walk.no_ignore_dot || self.walk.no_ignore)); - if self.sort { - walk.sort_by_file_name(|a, b| a.cmp(b)); +impl WalkArgs { + pub fn to_config(&self) -> config::Walk { + config::Walk { + extend_exclude: self.exclude.clone(), + ignore_hidden: Some(self.hidden), + ignore_files: Some(!self.no_ignore), + ignore_dot: Some(!self.no_ignore_dot), + ignore_vcs: Some(!self.no_ignore_vcs), + ignore_global: Some(!self.no_ignore_global), + ignore_parent: Some(!self.no_ignore_parent), } - - walk - } - - pub fn to_walk(&self) -> impl Iterator + '_ { - self.path.iter().flat_map(|path| { - self.to_walk_builder(path) - .build() - .filter_map(Result::ok) - .filter(|entry| { - entry - .metadata() - .as_ref() - .map(Metadata::is_file) - .unwrap_or(false) - }) - }) - } - - pub fn format(&self) -> Format { - self.format } } diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..8669c9f --- /dev/null +++ b/src/config.rs @@ -0,0 +1,420 @@ +//! Config parsers to recognize the config fields of [`typos`](https://crates.io/crates/typos-cli). +// It is based on +// but it has been modified to remove fields that we do not care about for the moment. +use std::collections::HashMap; +use std::ffi::OsStr; +use std::path::{Path, PathBuf}; + +use anyhow::Context; +use ignore::WalkBuilder; +use kstring::KString; + +const NO_CHECK_TYPES: &[&str] = &["cert", "lock"]; + +pub const SUPPORTED_FILE_NAMES: &[&str] = + &["typos.toml", "_typos.toml", ".typos.toml", "pyproject.toml"]; + +#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct Config { + pub files: Walk, + pub default: EngineConfig, + #[serde(rename = "type")] + pub type_: TypeEngineConfig, + #[serde(skip)] + pub overrides: EngineConfig, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct PyprojectTomlConfig { + pub tool: PyprojectTomlTool, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct PyprojectTomlTool { + pub typos: Option, +} + +impl Config { + pub fn from_dir(cwd: &Path) -> anyhow::Result> { + for file in find_project_files(cwd, SUPPORTED_FILE_NAMES) { + if let Some(config) = Self::from_file(&file)? { + return Ok(Some(config)); + } + } + + Ok(None) + } + + pub fn from_file(path: &Path) -> anyhow::Result> { + let s = std::fs::read_to_string(path) + .with_context(|| format!("could not read config at `{}`", path.display()))?; + + if path.file_name() == Some(OsStr::new("pyproject.toml")) { + let config = toml::from_str::(&s) + .with_context(|| format!("could not parse config at `{}`", path.display()))?; + + if config.tool.typos.is_none() { + Ok(None) + } else { + Ok(config.tool.typos) + } + } else { + Self::from_toml(&s) + .map(Some) + .with_context(|| format!("could not parse config at `{}`", path.display())) + } + } + + pub fn from_toml(data: &str) -> anyhow::Result { + toml::from_str(data).map_err(Into::into) + } + + pub fn from_defaults() -> Self { + Self { + files: Walk::from_defaults(), + default: EngineConfig::from_defaults(), + type_: TypeEngineConfig::from_defaults(), + overrides: EngineConfig::default(), + } + } + + pub fn update(&mut self, source: &Self) { + self.files.update(&source.files); + self.default.update(&source.default); + self.type_.update(&source.type_); + self.overrides.update(&source.overrides); + } + + pub fn to_walk_builder(&self, path: &Path) -> WalkBuilder { + let mut walk = ignore::WalkBuilder::new(path); + walk.skip_stdout(true) + .git_global(self.files.ignore_global()) + .git_ignore(self.files.ignore_vcs()) + .git_exclude(self.files.ignore_vcs()) + .hidden(self.files.ignore_hidden()) + .parents(self.files.ignore_parent()) + .ignore(self.files.ignore_dot()); + + walk + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(deny_unknown_fields)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct Walk { + pub extend_exclude: Vec, + /// Skip hidden files and directories. + pub ignore_hidden: Option, + + /// Respect ignore files. + pub ignore_files: Option, + + /// Respect .ignore files. + pub ignore_dot: Option, + + /// Respect ignore files in vcs directories. + pub ignore_vcs: Option, + + /// Respect global ignore files. + pub ignore_global: Option, + + /// Respect ignore files in parent directories. + pub ignore_parent: Option, +} + +impl Walk { + pub fn from_defaults() -> Self { + let empty = Self::default(); + Self { + extend_exclude: empty.extend_exclude.clone(), + ignore_hidden: Some(empty.ignore_hidden()), + ignore_files: Some(true), + ignore_dot: Some(empty.ignore_dot()), + ignore_vcs: Some(empty.ignore_vcs()), + ignore_global: Some(empty.ignore_global()), + ignore_parent: Some(empty.ignore_parent()), + } + } + + pub fn update(&mut self, source: &Self) { + self.extend_exclude + .extend(source.extend_exclude.iter().cloned()); + if let Some(source) = source.ignore_hidden { + self.ignore_hidden = Some(source); + } + if let Some(source) = source.ignore_files { + self.ignore_files = Some(source); + self.ignore_dot = None; + self.ignore_vcs = None; + self.ignore_global = None; + self.ignore_parent = None; + } + if let Some(source) = source.ignore_dot { + self.ignore_dot = Some(source); + } + if let Some(source) = source.ignore_vcs { + self.ignore_vcs = Some(source); + self.ignore_global = None; + } + if let Some(source) = source.ignore_global { + self.ignore_global = Some(source); + } + if let Some(source) = source.ignore_parent { + self.ignore_parent = Some(source); + } + } + + pub fn extend_exclude(&self) -> &[String] { + &self.extend_exclude + } + + pub fn ignore_hidden(&self) -> bool { + self.ignore_hidden.unwrap_or(true) + } + + pub fn ignore_dot(&self) -> bool { + self.ignore_dot.or(self.ignore_files).unwrap_or(true) + } + + pub fn ignore_vcs(&self) -> bool { + self.ignore_vcs.or(self.ignore_files).unwrap_or(true) + } + + pub fn ignore_global(&self) -> bool { + self.ignore_global + .or(self.ignore_vcs) + .or(self.ignore_files) + .unwrap_or(true) + } + + pub fn ignore_parent(&self) -> bool { + self.ignore_parent.or(self.ignore_files).unwrap_or(true) + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(default)] +#[serde(transparent)] +pub struct TypeEngineConfig { + pub patterns: HashMap, +} + +impl TypeEngineConfig { + pub fn from_defaults() -> Self { + let mut patterns = HashMap::new(); + + for no_check_type in NO_CHECK_TYPES { + patterns.insert( + KString::from(*no_check_type), + GlobEngineConfig { + extend_glob: Vec::new(), + engine: EngineConfig { + check_file: Some(false), + ..Default::default() + }, + }, + ); + } + + Self { patterns } + } + + pub fn update(&mut self, source: &Self) { + for (type_name, engine) in &source.patterns { + self.patterns + .entry(type_name.to_owned()) + .or_default() + .update(engine); + } + } + + pub fn patterns(&self) -> impl Iterator { + let mut engine = Self::from_defaults(); + engine.update(self); + engine.patterns.into_iter() + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct GlobEngineConfig { + pub extend_glob: Vec, + #[serde(flatten)] + pub engine: EngineConfig, +} + +impl GlobEngineConfig { + pub fn update(&mut self, source: &Self) { + self.extend_glob.extend(source.extend_glob.iter().cloned()); + self.engine.update(&source.engine); + } +} + +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +#[serde(default)] +#[serde(rename_all = "kebab-case")] +pub struct EngineConfig { + /// Check binary files. + pub binary: Option, + + /// Verifying spelling in files. + pub check_file: Option, +} + +impl EngineConfig { + pub fn from_defaults() -> Self { + let empty = Self::default(); + Self { + binary: Some(empty.binary()), + check_file: Some(empty.check_file()), + } + } + + pub fn update(&mut self, source: &Self) { + if let Some(source) = source.binary { + self.binary = Some(source); + } + if let Some(source) = source.check_file { + self.check_file = Some(source); + } + } + + pub fn binary(&self) -> bool { + self.binary.unwrap_or(false) + } + + pub fn check_file(&self) -> bool { + self.check_file.unwrap_or(true) + } +} + +impl PartialEq for EngineConfig { + fn eq(&self, rhs: &Self) -> bool { + self.binary == rhs.binary && self.check_file == rhs.check_file + } +} + +impl Eq for EngineConfig {} + +fn find_project_files<'a>( + dir: &'a Path, + names: &'a [&'a str], +) -> impl Iterator + 'a { + names + .iter() + .map(|name| dir.join(name)) + .filter(|path| path.exists()) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_update_from_nothing() { + let null = Config::default(); + let defaulted = Config::from_defaults(); + + let mut actual = defaulted.clone(); + actual.update(&null); + + assert_eq!(actual, defaulted); + } + + #[test] + fn test_update_from_defaults() { + let null = Config::default(); + let defaulted = Config::from_defaults(); + + let mut actual = null; + actual.update(&defaulted); + + assert_eq!(actual, defaulted); + } + + #[test] + fn test_extend_glob_updates() { + let null = GlobEngineConfig::default(); + let extended = GlobEngineConfig { + extend_glob: vec!["*.foo".into()], + ..Default::default() + }; + + let mut actual = null; + actual.update(&extended); + + assert_eq!(actual, extended); + } + + #[test] + fn test_extend_glob_extends() { + let base = GlobEngineConfig { + extend_glob: vec!["*.foo".into()], + ..Default::default() + }; + let extended = GlobEngineConfig { + extend_glob: vec!["*.bar".into()], + ..Default::default() + }; + + let mut actual = base; + actual.update(&extended); + + let expected: Vec = vec!["*.foo".into(), "*.bar".into()]; + assert_eq!(actual.extend_glob, expected); + } + + #[test] + fn parse_extend_globs() { + let input = r#"[type.po] +extend-glob = ["*.po"] +check-file = true +"#; + let mut expected = Config::default(); + expected.type_.patterns.insert( + "po".into(), + GlobEngineConfig { + extend_glob: vec!["*.po".into()], + engine: EngineConfig { + check_file: Some(true), + ..Default::default() + }, + }, + ); + let actual = Config::from_toml(input).unwrap(); + assert_eq!(actual, expected); + } + + #[test] + fn parse_extend_words() { + let input = r#"[type.shaders] +extend-glob = [ + '*.shader', + '*.cginc', +] + +[type.shaders.extend-words] +inout = "inout" +"#; + + let mut expected = Config::default(); + expected.type_.patterns.insert( + "shaders".into(), + GlobEngineConfig { + extend_glob: vec!["*.shader".into(), "*.cginc".into()], + engine: EngineConfig::default(), + }, + ); + let actual = Config::from_toml(input).unwrap(); + assert_eq!(actual, expected); + } +} diff --git a/src/lib.rs b/src/lib.rs index 3a2f7b2..c7bffc8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,3 @@ +pub mod config; pub mod lint; mod tree; diff --git a/src/main.rs b/src/main.rs index 5052c90..e72d627 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,16 @@ use std::collections::HashMap; use std::ffi::OsStr; +use std::io::Write; use std::ops::Deref; use std::sync::OnceLock; +use anyhow::Context; + use clap::Parser; use tree_sitter::Language; +use orthotypos::config::Config; use orthotypos::lint::Linter; mod cli; @@ -65,8 +69,18 @@ fn main() -> anyhow::Result<()> { let report_handler = args.format().into_error_hook(); miette::set_hook(report_handler)?; + let cwd = std::env::current_dir().context("no current working directory")?; + let mut config = Config::default(); + for ancestor in cwd.ancestors() { + if let Some(derived) = Config::from_dir(ancestor)? { + config.update(&derived); + break; + } + } + config.update(&args.to_config()); + let mut typo_found = false; - for file in args.to_walk() { + for file in args.to_walk(&config)? { let extension = file.path().extension().unwrap_or_default(); let Some(language) = EXTENSION_LANGUAGE.get(extension) else { continue; @@ -75,9 +89,10 @@ fn main() -> anyhow::Result<()> { let source_content = std::fs::read(file.path())?; let linter = Linter::new(language, source_content, &file.path().to_string_lossy())?; + let mut stderr = std::io::stderr().lock(); for typo in &linter { let typo: miette::Report = typo.into(); - eprintln!("{typo:?}"); + writeln!(stderr, "{typo:?}")?; typo_found = true; } }