From f3963260b34340da9010d1686973155a9608907e Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Thu, 18 Jan 2024 14:26:25 -0800 Subject: [PATCH 1/2] add `.is_rib()` function to BrokerItem --- src/cli/main.rs | 2 +- src/lib.rs | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/cli/main.rs b/src/cli/main.rs index 04ccbff..2ab95c7 100644 --- a/src/cli/main.rs +++ b/src/cli/main.rs @@ -543,7 +543,7 @@ fn main() { } let now = Utc::now().naive_utc(); (now - item.ts_start) - > match item.data_type.as_str() == "rib" { + > match item.is_rib() { true => Duration::hours(24), false => Duration::hours(1), } diff --git a/src/lib.rs b/src/lib.rs index b4266a9..1f531e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -497,12 +497,14 @@ impl BgpkitBroker { if let Some(data_type) = &self.query_params.data_type { match data_type.to_lowercase().as_str() { "rib" | "ribs" | "r" => { - if item.data_type.as_str() != "rib" { + if !item.is_rib() { + // if not RIB file, not match matches = false } } "update" | "updates" => { - if item.data_type.as_str() != "updates" { + if item.is_rib() { + // if is RIB file, not match matches = false } } @@ -546,6 +548,16 @@ impl BgpkitBroker { } } +impl BrokerItem { + /// Checks if the data type is "rib" (i.e. RIB dump). + /// + /// # Return + /// Returns `true` if the data type is "rib", otherwise `false`. + pub fn is_rib(&self) -> bool { + self.data_type.as_str() == "rib" + } +} + /// Iterator for BGPKIT Broker that iterates through one [BrokerItem] at a time. /// /// The [IntoIterator] trait is implemented for both the struct and the reference, so that you can @@ -739,13 +751,11 @@ mod tests { let broker = BgpkitBroker::new().data_type("rib".to_string()); let items = broker.latest().unwrap(); - assert!(items.iter().all(|item| item.data_type.as_str() == "rib")); + assert!(items.iter().all(|item| item.is_rib())); let broker = BgpkitBroker::new().data_type("update".to_string()); let items = broker.latest().unwrap(); - assert!(items - .iter() - .all(|item| item.data_type.as_str() == "updates")); + assert!(items.iter().all(|item| !item.is_rib())); let broker = BgpkitBroker::new().collector_id("rrc00".to_string()); let items = broker.latest().unwrap(); From de62abbeb911a6f40b62d08e4d4b362e34c5d8ac Mon Sep 17 00:00:00 2001 From: Mingwei Zhang Date: Thu, 18 Jan 2024 14:55:52 -0800 Subject: [PATCH 2/2] add strict ordering for BrokerItem --- src/item.rs | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 14 +---- src/query.rs | 36 +----------- 3 files changed, 167 insertions(+), 46 deletions(-) create mode 100644 src/item.rs diff --git a/src/item.rs b/src/item.rs new file mode 100644 index 0000000..c499c87 --- /dev/null +++ b/src/item.rs @@ -0,0 +1,163 @@ +//! BrokerItem module define the broker search results +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; +use std::fmt::{Display, Formatter}; + +/// BGPKIT Broker data item. +/// +/// The fields are: +/// - [ts_start][BrokerItem::ts_start]: the starting timestamp of the data file +/// - [ts_end][BrokerItem::ts_end]: the ending timestamp of the data file +/// - [collector_id][BrokerItem::collector_id]: the collector id of the item: e.g. `rrc00` +/// - [data_type][BrokerItem::data_type]: type of the data item: `rib` or `updates` +/// - [url][BrokerItem::url]: the URL to the data item file +/// - [rough_size][BrokerItem::rough_size]: rough file size extracted from the collector webpage +/// - [exact_size][BrokerItem::exact_size]: exact file size extracted by crawling the file +/// +/// An array of [BrokerItem]s can be sorted with the following order: +/// 1. smaller timestamp before larger timestamp +/// 2. RIB before updates +/// 3. then alphabetical order on collector ID (route-views before rrc) +#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq, Hash)] +#[cfg_attr(feature = "cli", derive(tabled::Tabled, utoipa::ToSchema))] +pub struct BrokerItem { + /// start timestamp + pub ts_start: chrono::NaiveDateTime, + /// end timestamps + pub ts_end: chrono::NaiveDateTime, + /// the collector id of the item: e.g. `rrc00` + pub collector_id: String, + /// type of the data item: `rib` or `updates` + pub data_type: String, + /// the URL to the data item file + pub url: String, + /// rough file size extracted from the hosting site page + pub rough_size: i64, + /// exact file size extracted by crawling the file + pub exact_size: i64, +} + +impl BrokerItem { + /// Checks if the data type is "rib" (i.e. RIB dump). + /// + /// # Return + /// Returns `true` if the data type is "rib", otherwise `false`. + pub fn is_rib(&self) -> bool { + self.data_type.as_str() == "rib" + } +} + +impl Display for BrokerItem { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", serde_json::to_string(self).unwrap()) + } +} + +impl PartialOrd for BrokerItem { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for BrokerItem { + fn cmp(&self, other: &Self) -> Ordering { + // compare BrokerItems with the following sequence + // 1. ts_start + // 2. data_type + // 3. collector_id + self.ts_start + .cmp(&other.ts_start) // smaller timestamp comes earlier + .then(self.data_type.cmp(&other.data_type)) // RIB before updates on the same timestamp + .then(self.collector_id.cmp(&other.collector_id)) // route-viewsX before rrcX + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::NaiveDateTime; + + #[test] + fn test_sorting() { + let mut items = vec![ + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(10, 0).unwrap(), + ts_end: Default::default(), + collector_id: "rrc00".to_string(), + data_type: "updates".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(9, 0).unwrap(), + ts_end: Default::default(), + collector_id: "rrc00".to_string(), + data_type: "updates".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(10, 0).unwrap(), + ts_end: Default::default(), + collector_id: "rrc00".to_string(), + data_type: "rib".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(10, 0).unwrap(), + ts_end: Default::default(), + collector_id: "route-views2".to_string(), + data_type: "rib".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + ]; + let correct_items = vec![ + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(9, 0).unwrap(), + ts_end: Default::default(), + collector_id: "rrc00".to_string(), + data_type: "updates".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(10, 0).unwrap(), + ts_end: Default::default(), + collector_id: "route-views2".to_string(), + data_type: "rib".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(10, 0).unwrap(), + ts_end: Default::default(), + collector_id: "rrc00".to_string(), + data_type: "rib".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + BrokerItem { + ts_start: NaiveDateTime::from_timestamp_opt(10, 0).unwrap(), + ts_end: Default::default(), + collector_id: "rrc00".to_string(), + data_type: "updates".to_string(), + url: "".to_string(), + rough_size: 0, + exact_size: 0, + }, + ]; + + assert_ne!(items, correct_items); + items.sort(); + assert_eq!(items, correct_items); + } +} diff --git a/src/lib.rs b/src/lib.rs index 1f531e0..4d21512 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,6 +90,7 @@ mod crawler; #[cfg(feature = "backend")] pub mod db; mod error; +mod item; mod query; use crate::query::{CollectorLatestResult, QueryResult}; @@ -100,7 +101,8 @@ pub use crawler::{crawl_collector, load_collectors, Collector}; #[cfg(feature = "backend")] pub use db::{LocalBrokerDb, UpdatesMeta, DEFAULT_PAGE_SIZE}; pub use error::BrokerError; -pub use query::{BrokerItem, QueryParams, SortOrder}; +pub use item::BrokerItem; +pub use query::{QueryParams, SortOrder}; /// BgpkitBroker struct maintains the broker's URL and handles making API queries. /// @@ -548,16 +550,6 @@ impl BgpkitBroker { } } -impl BrokerItem { - /// Checks if the data type is "rib" (i.e. RIB dump). - /// - /// # Return - /// Returns `true` if the data type is "rib", otherwise `false`. - pub fn is_rib(&self) -> bool { - self.data_type.as_str() == "rib" - } -} - /// Iterator for BGPKIT Broker that iterates through one [BrokerItem] at a time. /// /// The [IntoIterator] trait is implemented for both the struct and the reference, so that you can diff --git a/src/query.rs b/src/query.rs index 2fe24a6..2776da8 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,4 +1,5 @@ //! Query-related structs and implementation. +use crate::BrokerItem; use serde::{Deserialize, Serialize}; use std::fmt::{Display, Formatter}; @@ -220,35 +221,6 @@ impl QueryParams { } } -/// BGPKIT Broker data item. -/// -/// The fields are: -/// - [ts_start][BrokerItem::ts_start]: the starting timestamp of the data file -/// - [ts_end][BrokerItem::ts_end]: the ending timestamp of the data file -/// - [collector_id][BrokerItem::collector_id]: the collector id of the item: e.g. `rrc00` -/// - [data_type][BrokerItem::data_type]: type of the data item: `rib` or `updates` -/// - [url][BrokerItem::url]: the URL to the data item file -/// - [rough_size][BrokerItem::rough_size]: rough file size extracted from the collector webpage -/// - [exact_size][BrokerItem::exact_size]: exact file size extracted by crawling the file -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(feature = "cli", derive(tabled::Tabled, utoipa::ToSchema))] -pub struct BrokerItem { - /// start timestamp - pub ts_start: chrono::NaiveDateTime, - /// end timestamps - pub ts_end: chrono::NaiveDateTime, - /// the collector id of the item: e.g. `rrc00` - pub collector_id: String, - /// type of the data item: `rib` or `updates` - pub data_type: String, - /// the URL to the data item file - pub url: String, - /// rough file size extracted from the hosting site page - pub rough_size: i64, - /// exact file size extracted by crawling the file - pub exact_size: i64, -} - #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "cli", derive(tabled::Tabled, utoipa::ToSchema))] pub struct BrokerCollector { @@ -290,12 +262,6 @@ pub(crate) struct QueryResult { pub data: Vec, } -impl Display for BrokerItem { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", serde_json::to_string(self).unwrap()) - } -} - impl Display for QueryResult { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", serde_json::to_string(self).unwrap())