From b8776638382c9a83d77265a798023e75c7fbc16e Mon Sep 17 00:00:00 2001 From: praveer kumar Date: Wed, 24 Jul 2024 21:13:13 +0200 Subject: [PATCH] Restructuring the project files --- Cargo.lock | 228 +++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 3 +- src/main.rs | 64 ++++++------ src/parsers/jsonl.rs | 30 ++++++ src/parsers/mod.rs | 1 + 5 files changed, 293 insertions(+), 33 deletions(-) create mode 100644 src/parsers/jsonl.rs create mode 100644 src/parsers/mod.rs diff --git a/Cargo.lock b/Cargo.lock index da33672..8aed757 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,55 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys", +] + [[package]] name = "atty" version = "0.2.14" @@ -47,6 +96,52 @@ dependencies = [ "winapi", ] +[[package]] +name = "clap" +version = "4.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f6b81fb3c84f5563d509c59b5a48d935f689e993afa90fe39047f05adef9142" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca6706fd5224857d9ac5eb9355f6683563cc0541c7cd9d014043b57cbec78ac" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + [[package]] name = "console" version = "0.15.0" @@ -79,6 +174,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -106,6 +207,12 @@ dependencies = [ "regex", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + [[package]] name = "itoa" version = "0.4.8" @@ -170,6 +277,24 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + [[package]] name = "regex" version = "1.5.4" @@ -198,6 +323,7 @@ name = "schema" version = "0.1.0" dependencies = [ "chrono", + "clap", "env_logger", "indicatif", "log", @@ -222,6 +348,23 @@ dependencies = [ "serde", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "termcolor" version = "1.1.2" @@ -252,6 +395,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -288,3 +443,76 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml index c5033fa..d7f8340 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,4 +11,5 @@ chrono = "0.4.19" env_logger = "0.9.0" serde = "1.0.130" serde_json = "1.0" -indicatif = "0.16.2" \ No newline at end of file +indicatif = "0.16.2" +clap = { version = "4.5.10", features = ["derive"] } diff --git a/src/main.rs b/src/main.rs index 1dafb2b..3bbb038 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,36 +1,20 @@ -use std::env; -use std::fs::File; -use std::path::Path; -use std::io::{self, BufRead, Write}; +mod parsers; + +use clap::Parser; -use chrono::Local; -use log::LevelFilter; use env_logger::{Builder, Target}; -use serde_json::{Result, Value}; -use indicatif::{ProgressBar, ProgressStyle}; +use log::LevelFilter; +use chrono::Local; +use std::io::Write; -fn read_lines

(filename: P) -> io::Result<(io::Lines>, usize)> where P: AsRef, { - let file = File::open(filename)?; - let mut stream = io::BufReader::new(file); - let size = stream.fill_buf()?.len(); - Ok((stream.lines(), size)) -} +use parsers::jsonl::read_jsonl; -fn read_jsonl(filename: &str) -> Result<()> { - if let Ok((lines, size)) = read_lines(filename) { - let bar = ProgressBar::new(size as u64); - bar.set_style(ProgressStyle::default_bar().template("{spinner:.green} [{elapsed_precise}] [{bar:40.green}] ({pos}/{len}, ETA {eta})")); - for line in lines { - bar.inc(1); - if let Ok(data) = line { - log::info!("{}", format!("{}", data)); - let v: Value = serde_json::from_str(&data)?; - bar.set_message(format!("{}",v["name"])); - } - } - bar.finish_with_message("read all lines"); - } - Ok(()) +#[derive(Parser, Debug)] +struct Args { + #[arg(short, long, default_value = "./src/mock/test.jsonl", help="Specify path to the data file")] + path: String, + #[arg(short, long, default_value = "jsonl", help="Specify file format of the data file")] + format: String } fn main() { @@ -42,7 +26,23 @@ fn main() { record.args() ) }).filter(None, LevelFilter::Info).init(); - let args: Vec = env::args().collect(); - let filename = &args[1]; - read_jsonl(filename).expect("Cannot parse json"); + let args = Args::parse(); + + println!("Using the following file: {}", &args.path); + + match args.format.as_str() { + "jsonl" => { + read_jsonl(&args.path).expect("Cannot parse the data file"); + println!("Oh! it's a JSONL !!!"); + }, + "json" => { + println!("Oh! it's a JSON !!!"); + }, + "xml" => { + println!("Oh! it's a XML !!!"); + }, + _ => { + println!("The format does not match any of the possible text formats"); + }, + } } diff --git a/src/parsers/jsonl.rs b/src/parsers/jsonl.rs new file mode 100644 index 0000000..875f6e3 --- /dev/null +++ b/src/parsers/jsonl.rs @@ -0,0 +1,30 @@ +use std::fs::File; +use std::path::Path; + +use std::io::{self, BufRead}; +use serde_json::{Result, Value}; +use indicatif::{ProgressBar, ProgressStyle}; + +pub fn read_lines

(filename: P) -> io::Result<(io::Lines>, usize)> where P: AsRef, { + let file = File::open(filename)?; + let mut stream = io::BufReader::new(file); + let size = stream.fill_buf()?.len(); + Ok((stream.lines(), size)) +} + +pub fn read_jsonl(filename: &str) -> Result<()> { + if let Ok((lines, size)) = read_lines(filename) { + let bar = ProgressBar::new(size as u64); + bar.set_style(ProgressStyle::default_bar().template("{spinner:.green} [{elapsed_precise}] [{bar:40.green}] ({pos}/{len}, ETA {eta})")); + for line in lines { + bar.inc(1); + if let Ok(data) = line { + log::info!("{}", format!("{}", data)); + let v: Value = serde_json::from_str(&data)?; + bar.set_message(format!("{}",v["name"])); + } + } + bar.finish_with_message("read all lines"); + } + Ok(()) +} diff --git a/src/parsers/mod.rs b/src/parsers/mod.rs new file mode 100644 index 0000000..f188eb9 --- /dev/null +++ b/src/parsers/mod.rs @@ -0,0 +1 @@ +pub mod jsonl; \ No newline at end of file