From 047972eb98e295deed33658c8caaf330c2a896e4 Mon Sep 17 00:00:00 2001 From: miampf Date: Thu, 18 Jan 2024 21:09:39 +0000 Subject: [PATCH] feat: Taglines from files (#7) --- .gitignore | 2 ++ Cargo.lock | 51 ++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/lib.rs | 3 +++ src/main.rs | 12 ++++++++-- src/parsers.rs | 6 ----- src/search.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ tagline.pest | 2 +- 8 files changed, 128 insertions(+), 9 deletions(-) create mode 100644 src/search.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..42fe5d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /target + +/testfiles diff --git a/Cargo.lock b/Cargo.lock index 751a48f..e3e8550 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -137,6 +137,15 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "sha2" version = "0.10.8" @@ -165,6 +174,7 @@ version = "0.1.0" dependencies = [ "pest", "pest_derive", + "walkdir", ] [[package]] @@ -210,3 +220,44 @@ name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 71fd8d5..751bf2b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,3 +8,4 @@ edition = "2021" [dependencies] pest = "2.7.6" pest_derive = "2.7.6" +walkdir = "2.4.0" diff --git a/src/lib.rs b/src/lib.rs index 079be18..ed14c8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,5 @@ /// parsers contains the relevant grammar parsers pub mod parsers; + +/// search contains functions for searching files and tags. +pub mod search; diff --git a/src/main.rs b/src/main.rs index e7a11a9..f5149f6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,11 @@ -fn main() { - println!("Hello, world!"); +use tag::search::get_tags_from_files; + +fn main() -> Result<(), Box> { + let tagged_files = get_tags_from_files("testfiles")?; + + for file in tagged_files.iter() { + println!("File {} contains {:?}", file.path.display(), file.tags); + } + + Ok(()) } diff --git a/src/parsers.rs b/src/parsers.rs index 74fc01c..c86dedd 100644 --- a/src/parsers.rs +++ b/src/parsers.rs @@ -47,12 +47,6 @@ mod tests { expected_tags: vec!["#1", "#asdf", "#something-idk"], expected_error: false, }, - TestCase { - name: "success_with_newline", - input: "tags:\n[\n\t#something_else\n]", - expected_tags: vec!["#something_else"], - expected_error: false, - }, TestCase { name: "fail_no_brackets", input: "tags:#1#2#3", diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 0000000..a8b643f --- /dev/null +++ b/src/search.rs @@ -0,0 +1,60 @@ +use std::{ + fs, + io::{BufRead, BufReader}, + path::{Path, PathBuf}, +}; + +use pest::Parser; +use walkdir::WalkDir; + +use crate::parsers::tagline::{self, TaglineParser}; + +/// TaggedFile is a file that contains tags. +#[derive(Clone)] +pub struct TaggedFile { + pub path: PathBuf, + pub tags: Vec, +} + +/// get_tags_from_file() returns a list of tags found in a file. +/// It will return an error if a file has no parsable tags. +fn get_tags_from_file(file: &Path) -> Result, Box> { + let file = fs::File::open(file)?; + let mut buffer = BufReader::new(file); + let mut tagline = String::new(); + let _ = buffer.read_line(&mut tagline)?; + + let parsed = TaglineParser::parse(tagline::Rule::tagline, tagline.trim())?; + + let mut tags = Vec::new(); + + for tag in parsed { + if tag.as_rule() == tagline::Rule::tag { + tags.push(tag.as_str().to_string()) + } + } + + Ok(tags) +} + +/// get_tags_from_files() recursively retrieves the tags of all files +/// in a given directory. +pub fn get_tags_from_files(directory: &str) -> Result, Box> { + let mut tagged_files = Vec::new(); + + for entry in WalkDir::new(directory).follow_links(true) { + let entry = entry?; + + if entry.file_type().is_dir() { + continue; + } + + let tags = get_tags_from_file(entry.path())?; + tagged_files.push(TaggedFile { + path: entry.path().to_owned(), + tags, + }) + } + + Ok(tagged_files.clone()) +} diff --git a/tagline.pest b/tagline.pest index 79a0f9f..afd7492 100644 --- a/tagline.pest +++ b/tagline.pest @@ -2,5 +2,5 @@ tag = {"#" ~ (LETTER|NUMBER|CONNECTOR_PUNCTUATION|DASH_PUNCTUATION)+} taglist = _{"[" ~ tag* ~ "]"} tagline = _{SOI ~ "tags:" ~ taglist ~ EOI} -WHITESPACE = _{" " | "\t" | NEWLINE} +WHITESPACE = _{" " | "\t"}