feat: [experimental] add sorting for Rust derive traits (#23)

maksym-arutyunyan · Aug 30, 2024 · e1a97f6 · e1a97f6
1 parent 8d47bea
commit e1a97f6
Show file tree

Hide file tree

Showing 19 changed files with 784 additions and 7 deletions.
diff --git a/.github/workflows/workflow.yaml b/.github/workflows/workflow.yaml
@@ -144,7 +144,7 @@ jobs:
         run: |
           git ls-files -co --exclude-standard \
               | grep -vE "^misc/|^tests/|^README.md" \
-              | xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore" {}
+              | xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore,rust_derive_canonical" {}
         env:
           RUST_BACKTRACE: 1
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,17 @@
+# Changelog
+All notable changes to this project will be documented in this file.
+
+This changelog follows the [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) 
+format and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- Generic keyword sorting functionality
+- Support for Bazel files
+- Support for `Cargo.toml` files
+
+### Experimental
+- Support for `.gitignore` files
+- Support for `CODEOWNERS` files
+- Sorting of Rust `#[derive(...)]` traits
diff --git a/README.md b/README.md
@@ -111,3 +111,17 @@ bazel-b.txt
 /bazel-*
 bazel-a.txt
 ```
+
+### Rust Derive
+
+*NOTE: These features are experimental and require feature flags.*
+
+```shell
+$ keepsorted <path> --features rust_derive_alphabetical
+# or
+$ keepsorted <path> --features rust_derive_canonical
+```
+
+The feature is inspired by a closed ticket to update rust style, [link](https://github.com/rust-lang/style-team/issues/154).
+
+
diff --git a/misc/count_rust_derive_traits.py b/misc/count_rust_derive_traits.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+import os
+import re
+from collections import defaultdict
+
+def count_traits(directory):
+    # Regex to match the derive traits
+    derive_pattern = re.compile(r'#\[derive\((.*?)\)\]')
+
+    trait_count = defaultdict(int)
+
+    # Walk through the directory
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith('.rs'):
+                file_path = os.path.join(root, file)
+                with open(file_path, 'r') as f:
+                    content = f.read()
+
+                    # Find all derive traits in the file
+                    derives = derive_pattern.findall(content)
+                    for derive in derives:
+                        traits = [trait.strip() for trait in derive.split(',')]
+                        for trait in traits:
+                            trait_count[trait] += 1
+
+    # Sort the traits by frequency in descending order
+    sorted_traits = sorted(trait_count.items(), key=lambda x: x[1], reverse=True)
+
+    # Print the sorted results
+    for trait, count in sorted_traits:
+        print(f'{trait}: {count}')
+
+# Example usage
+if __name__ == "__main__":
+    directory = '.'
+    count_traits(directory)
+
+'''
+Clone: 4572
+PartialEq: 3753
+Debug: 3527
+Eq: 2225
+Deserialize: 1594
+Serialize: 1255
+::prost::Message: 1185
+Copy: 750
+candid::Deserialize: 695
+serde::Serialize: 694
+candid::CandidType: 691
+CandidType: 622
+Hash: 549
+comparable::Comparable: 546
+Default: 376
+PartialOrd: 312
+Ord: 305
+serde::Deserialize: 200
+Parser: 154
+::prost::Oneof: 132
+Error: 60
+ProposalMetadata: 51
+thiserror::Error: 47
+::prost::Enumeration: 43
+EnumIter: 41
+ZeroizeOnDrop: 41
+Zeroize: 40
+Decode: 40
+Encode: 40
+JsonSchema: 38
+ValidateEq: 32
+Args: 18
+Template: 12
+strum_macros::Display: 10
+IntoStaticStr: 8
+Arbitrary: 8
+Display: 7
+strum_macros::EnumIter: 7
+EnumString: 6
+Subcommand: 6
+ExhaustiveSet: 6
+ValueEnum: 5
+std::hash::Hash: 5
+AsRefStr: 5
+std::fmt::Debug: 4
+EnumCount: 4
+Message: 4
+ArgEnum: 4
+VariantNames: 2
+Request: 1
+clap::ArgEnum: 1
+EnumMessage: 1
+Educe: 1
+FromRepr: 1
+: 1
+strum_macros::EnumString: 1
+clap::Args: 1
+clap::Subcommand: 1
+'''
diff --git a/run-all.sh b/run-all.sh
@@ -29,7 +29,7 @@ fmt_status=$?
 # Also ignore `./misc/` and `./tests/`.
 git ls-files -co --exclude-standard \
     | grep -vE "^misc/|^tests/|^README.md" \
-    | xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore" {}
+    | xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore,rust_derive_canonical" {}
 keepsorted_status=$?
 
 # Check if keepsorted changed any files.

diff --git a/src/lib.rs b/src/lib.rs
@@ -38,11 +38,14 @@ pub fn process_file(path: &Path, features: Vec<String>) -> io::Result<()> {
     writer.flush()
 }
 
+#[derive(Copy, Clone)]
 pub enum Strategy {
     Generic,
     Bazel,
     CargoToml,
     Gitignore,
+    RustDeriveAlphabetical,
+    RustDeriveCanonical,
 }
 
 pub fn process_lines(strategy: Strategy, lines: Vec<String>) -> io::Result<Vec<String>> {
@@ -54,6 +57,10 @@ pub fn process_lines(strategy: Strategy, lines: Vec<String>) -> io::Result<Vec<S
         Strategy::Bazel => crate::strategies::bazel::process(lines),
         Strategy::CargoToml => crate::strategies::cargo_toml::process(lines),
         Strategy::Gitignore => crate::strategies::gitignore::process(lines),
+        Strategy::RustDeriveAlphabetical => {
+            crate::strategies::rust_derive::process(lines, strategy)
+        }
+        Strategy::RustDeriveCanonical => crate::strategies::rust_derive::process(lines, strategy),
     }
 }
 
@@ -70,6 +77,17 @@ fn classify(path: &Path, features: Vec<String>) -> Strategy {
     if features.contains(&"codeowners".to_string()) && is_codeowners(path) {
         return Strategy::Gitignore;
     }
+    if is_rust(path) {
+        match (
+            features.contains(&"rust_derive_alphabetical".to_string()),
+            features.contains(&"rust_derive_canonical".to_string()),
+        ) {
+            (true, true) => panic!("Mutually exclusive rust_derive feature flags are not allowed"),
+            (true, false) => return Strategy::RustDeriveAlphabetical,
+            (false, true) => return Strategy::RustDeriveCanonical,
+            _ => {}
+        }
+    }
     Strategy::Generic
 }
 
@@ -100,6 +118,10 @@ fn is_codeowners(path: &Path) -> bool {
     path.is_file() && path.file_name() == Some(std::ffi::OsStr::new("CODEOWNERS"))
 }
 
+fn is_rust(path: &Path) -> bool {
+    path.is_file() && path.extension() == Some(std::ffi::OsStr::new("rs"))
+}
+
 fn re_keyword_keep_sorted() -> Regex {
     Regex::new(
         r"(?i)^\s*(#|\/\/|#\s+keepsorted\s*:|\/\/\s+keepsorted\s*:)\s*keep\s+sorted\s*\.?\s*$",

diff --git a/src/main.rs b/src/main.rs
@@ -11,7 +11,7 @@ fn about() -> String {
     )
 }
 
-#[derive(Parser, Debug)]
+#[derive(Debug, Parser)]
 #[command(
     version,
     about = about(),

diff --git a/src/strategies/bazel.rs b/src/strategies/bazel.rs
@@ -106,7 +106,7 @@ fn is_single_line_comment(line: &str) -> bool {
 // beginning with "@". The next significant part of the comparison is the list
 // of elements in the value, where elements are split at `.' and `:'. Finally
 // we compare by value and break ties by original index.
-#[derive(Debug, Default, Eq, PartialEq)]
+#[derive(Eq, PartialEq, Debug, Default)]
 pub struct BazelSortKey {
     phase: i16,
     split: Vec<String>,

diff --git a/src/strategies/mod.rs b/src/strategies/mod.rs
@@ -2,3 +2,4 @@ pub mod bazel;
 pub mod cargo_toml;
 pub mod generic;
 pub mod gitignore;
+pub mod rust_derive;
diff --git a/src/strategies/rust_derive.rs b/src/strategies/rust_derive.rs
@@ -0,0 +1,156 @@
+use crate::Strategy;
+use once_cell::sync::Lazy;
+use regex::Regex;
+use std::io;
+
+use crate::is_ignore_block;
+
+static RE_DERIVE_BEGIN: Lazy<Regex> = Lazy::new(re_derive_begin);
+static RE_DERIVE_END: Lazy<Regex> = Lazy::new(re_derive_end);
+
+// These values count the number of characters and an extra '\n'.
+const STAY_ONE_LINE_LEN: usize = 97;
+const BREAK_INTO_MANY_LINES_LEN: usize = 101;
+
+pub(crate) fn process(lines: Vec<String>, strategy: Strategy) -> io::Result<Vec<String>> {
+    let mut output_lines: Vec<String> = Vec::new();
+    let mut block = Vec::new();
+    let mut is_sorting_block = false;
+    let mut is_ignore_block_prev_line = false;
+
+    for line in lines {
+        let mut is_derive_begin = false;
+        if RE_DERIVE_BEGIN.is_match(&line) {
+            if let Some(prev_line) = output_lines.last() {
+                is_ignore_block_prev_line = is_ignore_block(&[prev_line.clone()]);
+            }
+            is_derive_begin = true;
+            is_sorting_block = true;
+            block.push(line.clone());
+        }
+        if is_sorting_block && RE_DERIVE_END.is_match(&line) {
+            if !is_derive_begin {
+                block.push(line.clone());
+            }
+            block = sort(block, is_ignore_block_prev_line, strategy);
+            is_ignore_block_prev_line = false;
+            is_sorting_block = false;
+            output_lines.append(&mut block);
+        } else if is_sorting_block {
+            if !is_derive_begin {
+                block.push(line);
+            }
+        } else {
+            output_lines.push(line);
+        }
+    }
+
+    if is_sorting_block {
+        block = sort(block, is_ignore_block_prev_line, strategy);
+        output_lines.append(&mut block);
+    }
+
+    Ok(output_lines)
+}
+
+fn sort(block: Vec<String>, is_ignore_block_prev_line: bool, strategy: Strategy) -> Vec<String> {
+    if is_ignore_block_prev_line || is_ignore_block(&block) {
+        return block;
+    }
+    let line: String = block
+        .iter()
+        .map(|line| line.trim_end_matches('\n'))
+        .collect();
+    let line = format!("{}\n", line);
+    let trimmed_line = line.trim();
+
+    let mut result = Vec::new();
+    // Check if the line contains a #[derive(...)] statement
+    if let Some(derive_start) = trimmed_line.find("#[derive(") {
+        if let Some(derive_end) = trimmed_line[derive_start..].find(")]") {
+            let derive_content = &trimmed_line[derive_start + 9..derive_start + derive_end];
+            let mut traits: Vec<&str> = derive_content.split(',').map(str::trim).collect();
+
+            match strategy {
+                Strategy::RustDeriveAlphabetical => {
+                    traits.sort_unstable();
+                }
+                Strategy::RustDeriveCanonical => {
+                    traits = canonical_sort(traits);
+                }
+                _ => {
+                    return block;
+                }
+            }
+            traits.retain(|t| !t.is_empty());
+            let sorted_traits = traits.join(", ");
+            let new_derive = format!("#[derive({})]", sorted_traits);
+
+            // Reconstruct the line with preserved whitespaces
+            let prefix_whitespace = &line[..line.find(trimmed_line).unwrap_or(0)];
+            let suffix_whitespace =
+                &line[line.rfind(trimmed_line).unwrap_or(line.len()) + trimmed_line.len()..];
+
+            let new_line = format!("{}{}{}", prefix_whitespace, new_derive, suffix_whitespace);
+            if new_line.len() <= STAY_ONE_LINE_LEN {
+                result.push(new_line);
+            } else {
+                let mid_line = format!("{}    {},", prefix_whitespace, sorted_traits);
+                if mid_line.len() <= BREAK_INTO_MANY_LINES_LEN {
+                    result.push(format!("{}#[derive(\n", prefix_whitespace));
+                    result.push(format!("{}\n", mid_line));
+                    result.push(format!("{})]\n", prefix_whitespace));
+                } else {
+                    result.push(format!("{}#[derive(\n", prefix_whitespace));
+                    for x in traits {
+                        result.push(format!("{}    {},\n", prefix_whitespace, x));
+                    }
+                    result.push(format!("{})]\n", prefix_whitespace));
+                }
+            }
+        }
+    }
+
+    result
+}
+
+fn canonical_sort(traits: Vec<&str>) -> Vec<&str> {
+    // Define the canonical order of traits
+    let canonical_order = [
+        "Copy",
+        "Clone",
+        "Eq",
+        "PartialEq",
+        "Ord",
+        "PartialOrd",
+        "Hash",
+        "Debug",
+        "Display",
+        "Default",
+    ];
+
+    // Create a mapping from trait to its canonical index
+    let canonical_index: std::collections::HashMap<_, _> = canonical_order
+        .iter()
+        .enumerate()
+        .map(|(i, &trait_name)| (trait_name, i))
+        .collect();
+
+    // Sort traits by canonical index, and by trait name if indices are the same
+    let mut sorted_traits = traits;
+    sorted_traits.sort_by(|a, b| {
+        let index_a = canonical_index.get(a).unwrap_or(&usize::MAX);
+        let index_b = canonical_index.get(b).unwrap_or(&usize::MAX);
+        (index_a, a).cmp(&(index_b, b))
+    });
+
+    sorted_traits
+}
+
+fn re_derive_begin() -> Regex {
+    Regex::new(r"^\s*#\[derive\(").expect("Failed to build regex for rust derive begin")
+}
+
+fn re_derive_end() -> Regex {
+    Regex::new(r"\)\]\s*$").expect("Failed to build regex for rust derive end")
+}