Skip to content

Commit

Permalink
benchmarking jwalk and serial
Browse files Browse the repository at this point in the history
  • Loading branch information
perryqh committed Nov 15, 2024
1 parent daacced commit 0885b30
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 12 deletions.
48 changes: 44 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ enum_dispatch = "0.3.13"
fast-glob = "0.4.0"
ignore = "0.4.23"
itertools = "0.13.0"
jwalk = "0.8.1"
lazy_static = "1.5.0"
path-clean = "1.0.1"
rayon = "1.10.0"
Expand Down
5 changes: 4 additions & 1 deletion dev/run_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ echo "To run these benchmarks on your application, you can place this repo next

hyperfine --warmup=2 --runs=3 --export-markdown tmp/codeowners_benchmarks.md \
'../rubyatscale/codeowners-rs/target/release/codeowners gv' \
'bin/codeowners-rs gv'
'jwalk=1 ../rubyatscale/codeowners-rs/target/release/codeowners gv' \
'jwalk=1 entryrayon=1 ../rubyatscale/codeowners-rs/target/release/codeowners gv' \
'entryrayon=1 ../rubyatscale/codeowners-rs/target/release/codeowners gv' \
'bin/codeowners-rs gv'
120 changes: 113 additions & 7 deletions src/project_builder.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use std::{
fs::File,
path::{Path, PathBuf},
env, fs::File, path::{Path, PathBuf}
};

use error_stack::{Result, ResultExt};
use fast_glob::glob_match;
use ignore::WalkBuilder;
use jwalk::WalkDir;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use tracing::instrument;

Expand Down Expand Up @@ -50,22 +50,46 @@ impl<'a> ProjectBuilder<'a> {

#[instrument(level = "debug", skip_all)]
pub fn build(&mut self) -> Result<Project, Error> {
match env::var("jwalk") {
Ok(_) => self.build_with_jwalk(),
Err(_) => self.build_with_walkdir(),
}
}

fn build_with_jwalk(&mut self) -> Result<Project, Error> {
dbg!("building with jwalk");
let mut entry_types = Vec::with_capacity(INITIAL_VECTOR_CAPACITY);

for entry in WalkDir::new(&self.base_path).follow_links(true).skip_hidden(false).into_iter() {
let entry = match entry.change_context(Error::Io) {
Ok(entry) => entry,
Err(_) => continue,
};
let absolute_path = entry.path();
let is_dir = entry.file_type().is_dir();
entry_types.push(self.build_entry_type(&absolute_path, is_dir)?);
}
self.build_project_from_entry_types(entry_types)
}

fn build_with_walkdir(&mut self) -> Result<Project, Error> {
dbg!("building with walkdir");
let mut entry_types = Vec::with_capacity(INITIAL_VECTOR_CAPACITY);
let mut builder = WalkBuilder::new(&self.base_path);
builder.hidden(false);
let walkdir = builder.build();

for entry in walkdir {
let entry = entry.change_context(Error::Io)?;
entry_types.push(self.build_entry_type(entry)?);
let absolute_path = entry.path();
let is_dir = entry.file_type().ok_or(Error::Io).change_context(Error::Io)?.is_dir();

entry_types.push(self.build_entry_type(absolute_path, is_dir)?);
}
self.build_project_from_entry_types(entry_types)
}

fn build_entry_type(&mut self, entry: ignore::DirEntry) -> Result<EntryType, Error> {
let absolute_path = entry.path();

let is_dir = entry.file_type().ok_or(Error::Io).change_context(Error::Io)?.is_dir();
fn build_entry_type(&mut self, absolute_path: &Path, is_dir: bool) -> Result<EntryType, Error> {
let relative_path = absolute_path.strip_prefix(&self.base_path).change_context(Error::Io)?.to_owned();

if is_dir {
Expand Down Expand Up @@ -97,6 +121,88 @@ impl<'a> ProjectBuilder<'a> {
}

fn build_project_from_entry_types(&mut self, entry_types: Vec<EntryType>) -> Result<Project, Error> {
match env::var("entryrayon") {
Ok(_) => self.build_project_from_entry_types_rayon(entry_types),
Err(_) => self.build_project_from_entry_types_serial(entry_types),
}
}

fn build_project_from_entry_types_serial(&mut self, entry_types: Vec<EntryType>) -> Result<Project, Error> {
dbg!("building with serial");
let mut project_files = Vec::<ProjectFile>::with_capacity(INITIAL_VECTOR_CAPACITY);
let mut vendored_gems = Vec::<VendoredGem>::new();
let mut packages = Vec::<Package>::new();
let mut directory_codeowner_files = Vec::<DirectoryCodeownersFile>::new();
let mut teams = Vec::<Team>::new();

for entry_type in entry_types {
match entry_type {
EntryType::OwnedFile(project_file) => {
project_files.push(project_file);
}
EntryType::Directory(absolute_path, relative_path) => {
if relative_path.parent() == Some(Path::new(&self.config.vendored_gems_path)) {
let file_name = relative_path.file_name().expect("expected a file_name");
vendored_gems.push(VendoredGem {
path: absolute_path,
name: file_name.to_string_lossy().to_string(),
});
}
}
EntryType::RubyPackage(absolute_path, relative_path) => {
if let Some(owner) = ruby_package_owner(&absolute_path).unwrap() {
packages.push(Package {
path: relative_path.clone(),
owner,
package_type: PackageType::Ruby,
});
}
}
EntryType::JavascriptPackage(absolute_path, relative_path) => {
if let Some(owner) = javascript_package_owner(&absolute_path).unwrap() {
packages.push(Package {
path: relative_path.clone(),
owner,
package_type: PackageType::Javascript,
});
}
}
EntryType::CodeownerFile(absolute_path, relative_path) => {
let owner = std::fs::read_to_string(absolute_path).unwrap();
let owner = owner.trim().to_owned();
directory_codeowner_files.push(DirectoryCodeownersFile {
path: relative_path.clone(),
owner,
});
}
EntryType::TeamFile(absolute_path, _relative_path) => {
let file = File::open(&absolute_path).unwrap();
let deserializer: deserializers::Team = serde_yaml::from_reader(file).unwrap();
teams.push(Team {
path: absolute_path.to_owned(),
name: deserializer.name,
github_team: deserializer.github.team,
owned_globs: deserializer.owned_globs,
owned_gems: deserializer.ruby.map(|ruby| ruby.owned_gems).unwrap_or_default(),
avoid_ownership: deserializer.github.do_not_add_to_codeowners_file,
});
}
EntryType::NullEntry() => {}
}
}
Ok(Project {
base_path: self.base_path.to_owned(),
files: project_files,
vendored_gems,
teams,
packages,
codeowners_file_path: self.codeowners_file_path.to_path_buf(),
directory_codeowner_files,
})
}

fn build_project_from_entry_types_rayon(&mut self, entry_types: Vec<EntryType>) -> Result<Project, Error> {
dbg!("building entry types with rayon");
let (project_files, packages, vendored_gems, directory_codeowners, teams): (Vec<_>, Vec<_>, Vec<_>, Vec<_>, Vec<_>) = entry_types
.into_par_iter()
.fold(
Expand Down

0 comments on commit 0885b30

Please sign in to comment.