From 45609b3011e6b25e42c2e16a5aa930d09498f954 Mon Sep 17 00:00:00 2001 From: Jens Reimann <ctron@dentrassi.de> Date: Fri, 8 Mar 2024 10:32:56 +0100 Subject: [PATCH 1/7] chore: fix typo --- backend/api/src/system/advisory/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/api/src/system/advisory/mod.rs b/backend/api/src/system/advisory/mod.rs index 213371a27..968cd8dc1 100644 --- a/backend/api/src/system/advisory/mod.rs +++ b/backend/api/src/system/advisory/mod.rs @@ -55,17 +55,17 @@ impl InnerSystem { pub async fn ingest_advisory( &self, - identifer: &str, + identifier: &str, location: &str, sha256: &str, tx: Transactional<'_>, ) -> Result<AdvisoryContext, Error> { - if let Some(found) = self.get_advisory(identifer, location, sha256).await? { + if let Some(found) = self.get_advisory(identifier, location, sha256).await? { return Ok(found); } let model = entity::advisory::ActiveModel { - identifier: Set(identifer.to_string()), + identifier: Set(identifier.to_string()), location: Set(location.to_string()), sha256: Set(sha256.to_string()), ..Default::default() From b8858b649fbf2a79a8074a8512d0d5e0570f206d Mon Sep 17 00:00:00 2001 From: Jens Reimann <ctron@dentrassi.de> Date: Fri, 8 Mar 2024 10:38:17 +0100 Subject: [PATCH 2/7] refactor: digest in one step, pass types actually required --- backend/api/src/system/advisory/mod.rs | 18 +++++++++++------- backend/importer/src/csaf/mod.rs | 11 ++++------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/backend/api/src/system/advisory/mod.rs b/backend/api/src/system/advisory/mod.rs index 968cd8dc1..a37c42760 100644 --- a/backend/api/src/system/advisory/mod.rs +++ b/backend/api/src/system/advisory/mod.rs @@ -55,19 +55,23 @@ impl InnerSystem { pub async fn ingest_advisory( &self, - identifier: &str, - location: &str, - sha256: &str, + identifier: impl Into<String>, + location: impl Into<String>, + sha256: impl Into<String>, tx: Transactional<'_>, ) -> Result<AdvisoryContext, Error> { - if let Some(found) = self.get_advisory(identifier, location, sha256).await? { + let identifier = identifier.into(); + let location = location.into(); + let sha256 = sha256.into(); + + if let Some(found) = self.get_advisory(&identifier, &location, &sha256).await? { return Ok(found); } let model = entity::advisory::ActiveModel { - identifier: Set(identifier.to_string()), - location: Set(location.to_string()), - sha256: Set(sha256.to_string()), + identifier: Set(identifier), + location: Set(location), + sha256: Set(sha256), ..Default::default() }; diff --git a/backend/importer/src/csaf/mod.rs b/backend/importer/src/csaf/mod.rs index f408496fe..aaabcab6e 100644 --- a/backend/importer/src/csaf/mod.rs +++ b/backend/importer/src/csaf/mod.rs @@ -5,7 +5,6 @@ use csaf_walker::source::{DispatchSource, FileSource, HttpSource}; use csaf_walker::validation::{ValidatedAdvisory, ValidationError, ValidationVisitor}; use csaf_walker::visitors::filter::{FilterConfig, FilteringVisitor}; use csaf_walker::walker::Walker; -use sha2::digest::Output; use sha2::{Digest, Sha256}; use std::collections::HashSet; use std::process::ExitCode; @@ -133,12 +132,10 @@ async fn process(system: &InnerSystem, doc: ValidatedAdvisory) -> anyhow::Result } log::info!("Ingesting: {}", doc.url); - let sha256: String = match doc.sha256.clone() { + let sha256 = match doc.sha256.clone() { Some(sha) => sha.expected.clone(), None => { - let mut actual = Sha256::new(); - actual.update(&doc.data); - let digest: Output<Sha256> = actual.finalize(); + let digest = Sha256::digest(&doc.data); Hex(&digest).to_lower() } }; @@ -146,8 +143,8 @@ async fn process(system: &InnerSystem, doc: ValidatedAdvisory) -> anyhow::Result let advisory = system .ingest_advisory( &csaf.document.tracking.id, - doc.url.as_ref(), - &sha256, + doc.url.to_string(), + sha256, Transactional::None, ) .await?; From ea760fd130856c0c9cb2e1498defaff84940a0e0 Mon Sep 17 00:00:00 2001 From: Jens Reimann <ctron@dentrassi.de> Date: Fri, 8 Mar 2024 10:46:59 +0100 Subject: [PATCH 3/7] docs: import instructions --- backend/importer/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 backend/importer/README.md diff --git a/backend/importer/README.md b/backend/importer/README.md new file mode 100644 index 000000000..ba71c350c --- /dev/null +++ b/backend/importer/README.md @@ -0,0 +1,9 @@ +# Importing data + +## Importing advisories + +```bash +cargo run --bin +``` + +## Importing SBOMs From 4fbf5fdc9fc41f3f6479cb954fc5b458e1524aae Mon Sep 17 00:00:00 2001 From: Jens Reimann <ctron@dentrassi.de> Date: Fri, 8 Mar 2024 12:45:02 +0100 Subject: [PATCH 4/7] chore: work towards better importing --- backend/importer/.gitignore | 1 + backend/importer/Cargo.toml | 3 ++- backend/importer/README.md | 21 ++++++++++++++++++++- backend/importer/src/csaf/mod.rs | 31 ++++++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 5 deletions(-) create mode 100644 backend/importer/.gitignore diff --git a/backend/importer/.gitignore b/backend/importer/.gitignore new file mode 100644 index 000000000..3af0ccb68 --- /dev/null +++ b/backend/importer/.gitignore @@ -0,0 +1 @@ +/data diff --git a/backend/importer/Cargo.toml b/backend/importer/Cargo.toml index 334ec3adf..a6387b41e 100644 --- a/backend/importer/Cargo.toml +++ b/backend/importer/Cargo.toml @@ -29,4 +29,5 @@ walker-common = "=0.6.0-alpha.8" url = "2.4.0" sha2 = "0.10.6" async-trait = "0.1" - +indicatif = { version = "0.17.8", features = [] } +indicatif-log-bridge = "0.2" diff --git a/backend/importer/README.md b/backend/importer/README.md index ba71c350c..97736e25f 100644 --- a/backend/importer/README.md +++ b/backend/importer/README.md @@ -1,9 +1,28 @@ # Importing data +The following commands require some env-var for connecting to the database. You can supply them e.g., using `env`: + +```bash +env DB_USER=postgres DB_PASSWORD=eggs <command to run> +``` + ## Importing advisories ```bash -cargo run --bin +cargo run -p trustify-cli -- importer csaf https://www.redhat.com --only-prefix cve-2023- +``` + +Or, using a locally cached version: + +```bash +mkdir data/csaf +csaf download https://www.redhat.com --only-prefix cve-2023- -d data/csaf +``` + +And then: + +```bash +cargo run -p trustify-cli -- importer csaf data/csaf ``` ## Importing SBOMs diff --git a/backend/importer/src/csaf/mod.rs b/backend/importer/src/csaf/mod.rs index aaabcab6e..f5a648051 100644 --- a/backend/importer/src/csaf/mod.rs +++ b/backend/importer/src/csaf/mod.rs @@ -5,8 +5,11 @@ use csaf_walker::source::{DispatchSource, FileSource, HttpSource}; use csaf_walker::validation::{ValidatedAdvisory, ValidationError, ValidationVisitor}; use csaf_walker::visitors::filter::{FilterConfig, FilteringVisitor}; use csaf_walker::walker::Walker; +use indicatif::MultiProgress; +use indicatif_log_bridge::LogWrapper; use sha2::{Digest, Sha256}; use std::collections::HashSet; +use std::io::IsTerminal; use std::process::ExitCode; use std::time::SystemTime; use time::{Date, Month, UtcOffset}; @@ -15,6 +18,8 @@ use trustify_api::system::InnerSystem; use trustify_common::config::Database; use url::Url; use walker_common::fetcher::Fetcher; +use walker_common::progress::indicatif::MultiIndicatif; +use walker_common::progress::{NoProgress, Progress}; use walker_common::utils::hex::Hex; use walker_common::validate::ValidationOptions; @@ -42,7 +47,7 @@ pub struct ImportCsafCommand { impl ImportCsafCommand { pub async fn run(self) -> anyhow::Result<ExitCode> { - env_logger::init(); + let progress = progress(); let system = InnerSystem::with_config(&self.database).await?; @@ -84,7 +89,7 @@ impl ImportCsafCommand { }; let url = doc.url.clone(); - log::info!("processing: {url}"); + log::debug!("processing: {url}"); if let Err(err) = process(&system, doc).await { log::warn!("Failed to process {url}: {err}"); @@ -106,7 +111,7 @@ impl ImportCsafCommand { // walker - let mut walker = Walker::new(source); + let mut walker = Walker::new(source).with_progress(progress); if !self.skip_url.is_empty() { // set up a distribution filter by URL @@ -122,6 +127,26 @@ impl ImportCsafCommand { } } +fn progress() -> Progress { + let mut builder = env_logger::builder(); + let logger = builder.build(); + + match std::io::stdin().is_terminal() { + true => { + let max_level = logger.filter(); + let multi = MultiProgress::new(); + + let log = LogWrapper::new(multi.clone(), logger); + // NOTE: LogWrapper::try_init is buggy and messes up the log levels + log::set_boxed_logger(Box::new(log)).unwrap(); + log::set_max_level(max_level); + + Progress::new(MultiIndicatif(multi)) + } + false => Progress::new(NoProgress), + } +} + /// Process a single, validated advisory async fn process(system: &InnerSystem, doc: ValidatedAdvisory) -> anyhow::Result<()> { let csaf = serde_json::from_slice::<Csaf>(&doc.data)?; From 380bcd48d1b6eb2bd978413539c85d5f295f96eb Mon Sep 17 00:00:00 2001 From: Jens Reimann <ctron@dentrassi.de> Date: Fri, 8 Mar 2024 13:10:56 +0100 Subject: [PATCH 5/7] feat(import): add a progress meter for getting an ETA --- backend/importer/src/csaf/mod.rs | 23 ++--------------------- backend/importer/src/lib.rs | 1 + backend/importer/src/progress.rs | 26 ++++++++++++++++++++++++++ backend/importer/src/sbom/mod.rs | 8 ++++++-- 4 files changed, 35 insertions(+), 23 deletions(-) create mode 100644 backend/importer/src/progress.rs diff --git a/backend/importer/src/csaf/mod.rs b/backend/importer/src/csaf/mod.rs index f5a648051..a7447800e 100644 --- a/backend/importer/src/csaf/mod.rs +++ b/backend/importer/src/csaf/mod.rs @@ -1,3 +1,4 @@ +use crate::progress::init_log_and_progress; use ::csaf::document::Category; use ::csaf::Csaf; use csaf_walker::retrieve::RetrievingVisitor; @@ -47,7 +48,7 @@ pub struct ImportCsafCommand { impl ImportCsafCommand { pub async fn run(self) -> anyhow::Result<ExitCode> { - let progress = progress(); + let progress = init_log_and_progress(); let system = InnerSystem::with_config(&self.database).await?; @@ -127,26 +128,6 @@ impl ImportCsafCommand { } } -fn progress() -> Progress { - let mut builder = env_logger::builder(); - let logger = builder.build(); - - match std::io::stdin().is_terminal() { - true => { - let max_level = logger.filter(); - let multi = MultiProgress::new(); - - let log = LogWrapper::new(multi.clone(), logger); - // NOTE: LogWrapper::try_init is buggy and messes up the log levels - log::set_boxed_logger(Box::new(log)).unwrap(); - log::set_max_level(max_level); - - Progress::new(MultiIndicatif(multi)) - } - false => Progress::new(NoProgress), - } -} - /// Process a single, validated advisory async fn process(system: &InnerSystem, doc: ValidatedAdvisory) -> anyhow::Result<()> { let csaf = serde_json::from_slice::<Csaf>(&doc.data)?; diff --git a/backend/importer/src/lib.rs b/backend/importer/src/lib.rs index c67fa5bbe..b2bef2006 100644 --- a/backend/importer/src/lib.rs +++ b/backend/importer/src/lib.rs @@ -4,6 +4,7 @@ use clap::Subcommand; use trustify_common::config::Database; mod csaf; +mod progress; mod sbom; #[derive(Subcommand, Debug)] diff --git a/backend/importer/src/progress.rs b/backend/importer/src/progress.rs new file mode 100644 index 000000000..def86c682 --- /dev/null +++ b/backend/importer/src/progress.rs @@ -0,0 +1,26 @@ +use indicatif::MultiProgress; +use indicatif_log_bridge::LogWrapper; +use std::io::IsTerminal; +use walker_common::progress::indicatif::MultiIndicatif; +use walker_common::progress::{NoProgress, Progress}; + +/// Set up the env_logger and attach a progress interface if we are running on a terminal. +pub(crate) fn init_log_and_progress() -> Progress { + let mut builder = env_logger::builder(); + let logger = builder.build(); + + match std::io::stdin().is_terminal() { + true => { + let max_level = logger.filter(); + let multi = MultiProgress::new(); + + let log = LogWrapper::new(multi.clone(), logger); + // NOTE: LogWrapper::try_init is buggy and messes up the log levels + log::set_boxed_logger(Box::new(log)).unwrap(); + log::set_max_level(max_level); + + Progress::new(MultiIndicatif(multi)) + } + false => Progress::new(NoProgress), + } +} diff --git a/backend/importer/src/sbom/mod.rs b/backend/importer/src/sbom/mod.rs index 16bc9e168..85a8b4f6c 100644 --- a/backend/importer/src/sbom/mod.rs +++ b/backend/importer/src/sbom/mod.rs @@ -1,3 +1,4 @@ +use crate::progress::init_log_and_progress; use sbom_walker::{ retrieve::RetrievingVisitor, source::{DispatchSource, FileSource, HttpSource}, @@ -26,7 +27,7 @@ pub struct ImportSbomCommand { impl ImportSbomCommand { pub async fn run(self) -> anyhow::Result<ExitCode> { - env_logger::init(); + let progress = init_log_and_progress(); log::info!("Ingesting SBOMs"); @@ -63,7 +64,10 @@ impl ImportSbomCommand { // walker - Walker::new(source).walk(visitor).await?; + Walker::new(source) + .with_progress(progress) + .walk(visitor) + .await?; Ok(ExitCode::SUCCESS) } From ae396525299c6dbebabbac8cb4ea607804f7f6bd Mon Sep 17 00:00:00 2001 From: Jens Reimann <ctron@dentrassi.de> Date: Fri, 8 Mar 2024 16:17:49 +0100 Subject: [PATCH 6/7] feat(import): allow using multiple worker --- backend/Cargo.toml | 6 ++--- backend/importer/src/csaf/mod.rs | 38 ++++++++++++++------------------ backend/importer/src/progress.rs | 7 +++--- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/backend/Cargo.toml b/backend/Cargo.toml index b96e8d77c..6efc0ba5c 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -11,9 +11,9 @@ members = [ ] [patch.crates-io] -csaf-walker = { git = "https://github.com/ctron/csaf-walker", rev = "c96c96f2f2dff240c394c065354740e7208f4ee1" } -sbom-walker = { git = "https://github.com/ctron/csaf-walker", rev = "c96c96f2f2dff240c394c065354740e7208f4ee1" } -walker-common = { git = "https://github.com/ctron/csaf-walker", rev = "c96c96f2f2dff240c394c065354740e7208f4ee1" } +csaf-walker = { git = "https://github.com/ctron/csaf-walker", rev = "95d1d1e25a0def07e563f6a92722204cee861ea4" } +sbom-walker = { git = "https://github.com/ctron/csaf-walker", rev = "95d1d1e25a0def07e563f6a92722204cee861ea4" } +walker-common = { git = "https://github.com/ctron/csaf-walker", rev = "95d1d1e25a0def07e563f6a92722204cee861ea4" } #csaf-walker = { path = "../../csaf-walker/csaf" } #sbom-walker = { path = "../../csaf-walker/sbom" } diff --git a/backend/importer/src/csaf/mod.rs b/backend/importer/src/csaf/mod.rs index a7447800e..20fdf1c84 100644 --- a/backend/importer/src/csaf/mod.rs +++ b/backend/importer/src/csaf/mod.rs @@ -1,28 +1,21 @@ use crate::progress::init_log_and_progress; -use ::csaf::document::Category; -use ::csaf::Csaf; -use csaf_walker::retrieve::RetrievingVisitor; -use csaf_walker::source::{DispatchSource, FileSource, HttpSource}; -use csaf_walker::validation::{ValidatedAdvisory, ValidationError, ValidationVisitor}; -use csaf_walker::visitors::filter::{FilterConfig, FilteringVisitor}; -use csaf_walker::walker::Walker; -use indicatif::MultiProgress; -use indicatif_log_bridge::LogWrapper; +use ::csaf::{document::Category, Csaf}; +use csaf_walker::{ + retrieve::RetrievingVisitor, + source::{DispatchSource, FileSource, HttpSource}, + validation::{ValidatedAdvisory, ValidationError, ValidationVisitor}, + visitors::filter::{FilterConfig, FilteringVisitor}, + walker::Walker, +}; use sha2::{Digest, Sha256}; use std::collections::HashSet; -use std::io::IsTerminal; use std::process::ExitCode; use std::time::SystemTime; use time::{Date, Month, UtcOffset}; -use trustify_api::db::Transactional; -use trustify_api::system::InnerSystem; +use trustify_api::{db::Transactional, system::InnerSystem}; use trustify_common::config::Database; use url::Url; -use walker_common::fetcher::Fetcher; -use walker_common::progress::indicatif::MultiIndicatif; -use walker_common::progress::{NoProgress, Progress}; -use walker_common::utils::hex::Hex; -use walker_common::validate::ValidationOptions; +use walker_common::{fetcher::Fetcher, utils::hex::Hex, validate::ValidationOptions}; /// Run the importer #[derive(clap::Args, Debug)] @@ -34,16 +27,19 @@ pub struct ImportCsafCommand { pub source: String, /// If the source is a full source URL - #[arg(long)] + #[arg(long, env)] pub full_source_url: bool, /// Distribution URLs or ROLIE feed URLs to skip - #[arg(long)] + #[arg(long, env)] pub skip_url: Vec<String>, /// Only consider files having any of those prefixes. An empty list will accept all files. - #[arg(long)] + #[arg(long, env)] pub only_prefix: Vec<String>, + + #[arg(long, env, default_value_t = 1)] + pub workers: usize, } impl ImportCsafCommand { @@ -122,7 +118,7 @@ impl ImportCsafCommand { }); } - walker.walk(visitor).await?; + walker.walk_parallel(self.workers, visitor).await?; Ok(ExitCode::SUCCESS) } diff --git a/backend/importer/src/progress.rs b/backend/importer/src/progress.rs index def86c682..b0099396b 100644 --- a/backend/importer/src/progress.rs +++ b/backend/importer/src/progress.rs @@ -1,8 +1,7 @@ use indicatif::MultiProgress; use indicatif_log_bridge::LogWrapper; use std::io::IsTerminal; -use walker_common::progress::indicatif::MultiIndicatif; -use walker_common::progress::{NoProgress, Progress}; +use walker_common::progress::Progress; /// Set up the env_logger and attach a progress interface if we are running on a terminal. pub(crate) fn init_log_and_progress() -> Progress { @@ -19,8 +18,8 @@ pub(crate) fn init_log_and_progress() -> Progress { log::set_boxed_logger(Box::new(log)).unwrap(); log::set_max_level(max_level); - Progress::new(MultiIndicatif(multi)) + multi.into() } - false => Progress::new(NoProgress), + false => Progress::default(), } } From 807931f7837e76d7120e0d8f7e6b8b6f82e4ab7c Mon Sep 17 00:00:00 2001 From: Jens Reimann <ctron@dentrassi.de> Date: Fri, 8 Mar 2024 17:41:50 +0100 Subject: [PATCH 7/7] chore: remove unwrap() --- backend/importer/src/csaf/mod.rs | 2 +- backend/importer/src/progress.rs | 9 +++++---- backend/importer/src/sbom/mod.rs | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/backend/importer/src/csaf/mod.rs b/backend/importer/src/csaf/mod.rs index 20fdf1c84..8e867f841 100644 --- a/backend/importer/src/csaf/mod.rs +++ b/backend/importer/src/csaf/mod.rs @@ -44,7 +44,7 @@ pub struct ImportCsafCommand { impl ImportCsafCommand { pub async fn run(self) -> anyhow::Result<ExitCode> { - let progress = init_log_and_progress(); + let progress = init_log_and_progress()?; let system = InnerSystem::with_config(&self.database).await?; diff --git a/backend/importer/src/progress.rs b/backend/importer/src/progress.rs index b0099396b..ee3b97eb1 100644 --- a/backend/importer/src/progress.rs +++ b/backend/importer/src/progress.rs @@ -1,10 +1,11 @@ +use anyhow::Context; use indicatif::MultiProgress; use indicatif_log_bridge::LogWrapper; use std::io::IsTerminal; use walker_common::progress::Progress; /// Set up the env_logger and attach a progress interface if we are running on a terminal. -pub(crate) fn init_log_and_progress() -> Progress { +pub(crate) fn init_log_and_progress() -> anyhow::Result<Progress> { let mut builder = env_logger::builder(); let logger = builder.build(); @@ -15,11 +16,11 @@ pub(crate) fn init_log_and_progress() -> Progress { let log = LogWrapper::new(multi.clone(), logger); // NOTE: LogWrapper::try_init is buggy and messes up the log levels - log::set_boxed_logger(Box::new(log)).unwrap(); + log::set_boxed_logger(Box::new(log)).context("failed to initialize logger")?; log::set_max_level(max_level); - multi.into() + Ok(multi.into()) } - false => Progress::default(), + false => Ok(Progress::default()), } } diff --git a/backend/importer/src/sbom/mod.rs b/backend/importer/src/sbom/mod.rs index 85a8b4f6c..a754dbf17 100644 --- a/backend/importer/src/sbom/mod.rs +++ b/backend/importer/src/sbom/mod.rs @@ -27,7 +27,7 @@ pub struct ImportSbomCommand { impl ImportSbomCommand { pub async fn run(self) -> anyhow::Result<ExitCode> { - let progress = init_log_and_progress(); + let progress = init_log_and_progress()?; log::info!("Ingesting SBOMs");