Skip to content

Commit

Permalink
feat: [experimental] add sorting for Rust derive traits (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
maksym-arutyunyan authored Aug 30, 2024
1 parent 8d47bea commit e1a97f6
Show file tree
Hide file tree
Showing 19 changed files with 784 additions and 7 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/workflow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ jobs:
run: |
git ls-files -co --exclude-standard \
| grep -vE "^misc/|^tests/|^README.md" \
| xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore" {}
| xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore,rust_derive_canonical" {}
env:
RUST_BACKTRACE: 1

Expand Down
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Changelog
All notable changes to this project will be documented in this file.

This changelog follows the [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
format and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Added
- Generic keyword sorting functionality
- Support for Bazel files
- Support for `Cargo.toml` files

### Experimental
- Support for `.gitignore` files
- Support for `CODEOWNERS` files
- Sorting of Rust `#[derive(...)]` traits
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,17 @@ bazel-b.txt
/bazel-*
bazel-a.txt
```

### Rust Derive

*NOTE: These features are experimental and require feature flags.*

```shell
$ keepsorted <path> --features rust_derive_alphabetical
# or
$ keepsorted <path> --features rust_derive_canonical
```

The feature is inspired by a closed ticket to update rust style, [link](https://github.com/rust-lang/style-team/issues/154).


98 changes: 98 additions & 0 deletions misc/count_rust_derive_traits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env python3
import os
import re
from collections import defaultdict

def count_traits(directory):
# Regex to match the derive traits
derive_pattern = re.compile(r'#\[derive\((.*?)\)\]')

trait_count = defaultdict(int)

# Walk through the directory
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.rs'):
file_path = os.path.join(root, file)
with open(file_path, 'r') as f:
content = f.read()

# Find all derive traits in the file
derives = derive_pattern.findall(content)
for derive in derives:
traits = [trait.strip() for trait in derive.split(',')]
for trait in traits:
trait_count[trait] += 1

# Sort the traits by frequency in descending order
sorted_traits = sorted(trait_count.items(), key=lambda x: x[1], reverse=True)

# Print the sorted results
for trait, count in sorted_traits:
print(f'{trait}: {count}')

# Example usage
if __name__ == "__main__":
directory = '.'
count_traits(directory)

'''
Clone: 4572
PartialEq: 3753
Debug: 3527
Eq: 2225
Deserialize: 1594
Serialize: 1255
::prost::Message: 1185
Copy: 750
candid::Deserialize: 695
serde::Serialize: 694
candid::CandidType: 691
CandidType: 622
Hash: 549
comparable::Comparable: 546
Default: 376
PartialOrd: 312
Ord: 305
serde::Deserialize: 200
Parser: 154
::prost::Oneof: 132
Error: 60
ProposalMetadata: 51
thiserror::Error: 47
::prost::Enumeration: 43
EnumIter: 41
ZeroizeOnDrop: 41
Zeroize: 40
Decode: 40
Encode: 40
JsonSchema: 38
ValidateEq: 32
Args: 18
Template: 12
strum_macros::Display: 10
IntoStaticStr: 8
Arbitrary: 8
Display: 7
strum_macros::EnumIter: 7
EnumString: 6
Subcommand: 6
ExhaustiveSet: 6
ValueEnum: 5
std::hash::Hash: 5
AsRefStr: 5
std::fmt::Debug: 4
EnumCount: 4
Message: 4
ArgEnum: 4
VariantNames: 2
Request: 1
clap::ArgEnum: 1
EnumMessage: 1
Educe: 1
FromRepr: 1
: 1
strum_macros::EnumString: 1
clap::Args: 1
clap::Subcommand: 1
'''
2 changes: 1 addition & 1 deletion run-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fmt_status=$?
# Also ignore `./misc/` and `./tests/`.
git ls-files -co --exclude-standard \
| grep -vE "^misc/|^tests/|^README.md" \
| xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore" {}
| xargs -I {} bash -c "./target/release/keepsorted '{}' --features gitignore,rust_derive_canonical" {}
keepsorted_status=$?

# Check if keepsorted changed any files.
Expand Down
22 changes: 22 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ pub fn process_file(path: &Path, features: Vec<String>) -> io::Result<()> {
writer.flush()
}

#[derive(Copy, Clone)]
pub enum Strategy {
Generic,
Bazel,
CargoToml,
Gitignore,
RustDeriveAlphabetical,
RustDeriveCanonical,
}

pub fn process_lines(strategy: Strategy, lines: Vec<String>) -> io::Result<Vec<String>> {
Expand All @@ -54,6 +57,10 @@ pub fn process_lines(strategy: Strategy, lines: Vec<String>) -> io::Result<Vec<S
Strategy::Bazel => crate::strategies::bazel::process(lines),
Strategy::CargoToml => crate::strategies::cargo_toml::process(lines),
Strategy::Gitignore => crate::strategies::gitignore::process(lines),
Strategy::RustDeriveAlphabetical => {
crate::strategies::rust_derive::process(lines, strategy)
}
Strategy::RustDeriveCanonical => crate::strategies::rust_derive::process(lines, strategy),
}
}

Expand All @@ -70,6 +77,17 @@ fn classify(path: &Path, features: Vec<String>) -> Strategy {
if features.contains(&"codeowners".to_string()) && is_codeowners(path) {
return Strategy::Gitignore;
}
if is_rust(path) {
match (
features.contains(&"rust_derive_alphabetical".to_string()),
features.contains(&"rust_derive_canonical".to_string()),
) {
(true, true) => panic!("Mutually exclusive rust_derive feature flags are not allowed"),
(true, false) => return Strategy::RustDeriveAlphabetical,
(false, true) => return Strategy::RustDeriveCanonical,
_ => {}
}
}
Strategy::Generic
}

Expand Down Expand Up @@ -100,6 +118,10 @@ fn is_codeowners(path: &Path) -> bool {
path.is_file() && path.file_name() == Some(std::ffi::OsStr::new("CODEOWNERS"))
}

fn is_rust(path: &Path) -> bool {
path.is_file() && path.extension() == Some(std::ffi::OsStr::new("rs"))
}

fn re_keyword_keep_sorted() -> Regex {
Regex::new(
r"(?i)^\s*(#|\/\/|#\s+keepsorted\s*:|\/\/\s+keepsorted\s*:)\s*keep\s+sorted\s*\.?\s*$",
Expand Down
2 changes: 1 addition & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ fn about() -> String {
)
}

#[derive(Parser, Debug)]
#[derive(Debug, Parser)]
#[command(
version,
about = about(),
Expand Down
2 changes: 1 addition & 1 deletion src/strategies/bazel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ fn is_single_line_comment(line: &str) -> bool {
// beginning with "@". The next significant part of the comparison is the list
// of elements in the value, where elements are split at `.' and `:'. Finally
// we compare by value and break ties by original index.
#[derive(Debug, Default, Eq, PartialEq)]
#[derive(Eq, PartialEq, Debug, Default)]
pub struct BazelSortKey {
phase: i16,
split: Vec<String>,
Expand Down
1 change: 1 addition & 0 deletions src/strategies/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ pub mod bazel;
pub mod cargo_toml;
pub mod generic;
pub mod gitignore;
pub mod rust_derive;
156 changes: 156 additions & 0 deletions src/strategies/rust_derive.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
use crate::Strategy;
use once_cell::sync::Lazy;
use regex::Regex;
use std::io;

use crate::is_ignore_block;

static RE_DERIVE_BEGIN: Lazy<Regex> = Lazy::new(re_derive_begin);
static RE_DERIVE_END: Lazy<Regex> = Lazy::new(re_derive_end);

// These values count the number of characters and an extra '\n'.
const STAY_ONE_LINE_LEN: usize = 97;
const BREAK_INTO_MANY_LINES_LEN: usize = 101;

pub(crate) fn process(lines: Vec<String>, strategy: Strategy) -> io::Result<Vec<String>> {
let mut output_lines: Vec<String> = Vec::new();
let mut block = Vec::new();
let mut is_sorting_block = false;
let mut is_ignore_block_prev_line = false;

for line in lines {
let mut is_derive_begin = false;
if RE_DERIVE_BEGIN.is_match(&line) {
if let Some(prev_line) = output_lines.last() {
is_ignore_block_prev_line = is_ignore_block(&[prev_line.clone()]);
}
is_derive_begin = true;
is_sorting_block = true;
block.push(line.clone());
}
if is_sorting_block && RE_DERIVE_END.is_match(&line) {
if !is_derive_begin {
block.push(line.clone());
}
block = sort(block, is_ignore_block_prev_line, strategy);
is_ignore_block_prev_line = false;
is_sorting_block = false;
output_lines.append(&mut block);
} else if is_sorting_block {
if !is_derive_begin {
block.push(line);
}
} else {
output_lines.push(line);
}
}

if is_sorting_block {
block = sort(block, is_ignore_block_prev_line, strategy);
output_lines.append(&mut block);
}

Ok(output_lines)
}

fn sort(block: Vec<String>, is_ignore_block_prev_line: bool, strategy: Strategy) -> Vec<String> {
if is_ignore_block_prev_line || is_ignore_block(&block) {
return block;
}
let line: String = block
.iter()
.map(|line| line.trim_end_matches('\n'))
.collect();
let line = format!("{}\n", line);
let trimmed_line = line.trim();

let mut result = Vec::new();
// Check if the line contains a #[derive(...)] statement
if let Some(derive_start) = trimmed_line.find("#[derive(") {
if let Some(derive_end) = trimmed_line[derive_start..].find(")]") {
let derive_content = &trimmed_line[derive_start + 9..derive_start + derive_end];
let mut traits: Vec<&str> = derive_content.split(',').map(str::trim).collect();

match strategy {
Strategy::RustDeriveAlphabetical => {
traits.sort_unstable();
}
Strategy::RustDeriveCanonical => {
traits = canonical_sort(traits);
}
_ => {
return block;
}
}
traits.retain(|t| !t.is_empty());
let sorted_traits = traits.join(", ");
let new_derive = format!("#[derive({})]", sorted_traits);

// Reconstruct the line with preserved whitespaces
let prefix_whitespace = &line[..line.find(trimmed_line).unwrap_or(0)];
let suffix_whitespace =
&line[line.rfind(trimmed_line).unwrap_or(line.len()) + trimmed_line.len()..];

let new_line = format!("{}{}{}", prefix_whitespace, new_derive, suffix_whitespace);
if new_line.len() <= STAY_ONE_LINE_LEN {
result.push(new_line);
} else {
let mid_line = format!("{} {},", prefix_whitespace, sorted_traits);
if mid_line.len() <= BREAK_INTO_MANY_LINES_LEN {
result.push(format!("{}#[derive(\n", prefix_whitespace));
result.push(format!("{}\n", mid_line));
result.push(format!("{})]\n", prefix_whitespace));
} else {
result.push(format!("{}#[derive(\n", prefix_whitespace));
for x in traits {
result.push(format!("{} {},\n", prefix_whitespace, x));
}
result.push(format!("{})]\n", prefix_whitespace));
}
}
}
}

result
}

fn canonical_sort(traits: Vec<&str>) -> Vec<&str> {
// Define the canonical order of traits
let canonical_order = [
"Copy",
"Clone",
"Eq",
"PartialEq",
"Ord",
"PartialOrd",
"Hash",
"Debug",
"Display",
"Default",
];

// Create a mapping from trait to its canonical index
let canonical_index: std::collections::HashMap<_, _> = canonical_order
.iter()
.enumerate()
.map(|(i, &trait_name)| (trait_name, i))
.collect();

// Sort traits by canonical index, and by trait name if indices are the same
let mut sorted_traits = traits;
sorted_traits.sort_by(|a, b| {
let index_a = canonical_index.get(a).unwrap_or(&usize::MAX);
let index_b = canonical_index.get(b).unwrap_or(&usize::MAX);
(index_a, a).cmp(&(index_b, b))
});

sorted_traits
}

fn re_derive_begin() -> Regex {
Regex::new(r"^\s*#\[derive\(").expect("Failed to build regex for rust derive begin")
}

fn re_derive_end() -> Regex {
Regex::new(r"\)\]\s*$").expect("Failed to build regex for rust derive end")
}
Loading

0 comments on commit e1a97f6

Please sign in to comment.