From 6904ea030aa255b8d0df504d47ea030e0887f993 Mon Sep 17 00:00:00 2001 From: Robert Kruszewski Date: Tue, 5 Nov 2024 13:53:50 +0000 Subject: [PATCH] Replace usages of lazy_static with LazyLock (#1214) fix #1139 --- Cargo.lock | 7 - Cargo.toml | 4 +- bench-vortex/Cargo.toml | 1 - bench-vortex/benches/datafusion.rs | 23 +- bench-vortex/benches/tokio_runtime.rs | 13 +- bench-vortex/src/lib.rs | 22 +- bench-vortex/src/public_bi_data.rs | 330 ++++++++++++++++++-------- bench-vortex/src/tpch/schema.rs | 60 +++-- pyvortex/Cargo.toml | 1 - pyvortex/src/lib.rs | 11 +- vortex-array/Cargo.toml | 1 - vortex-datafusion/Cargo.toml | 1 - vortex-datafusion/src/plans.rs | 18 +- vortex-datetime-dtype/Cargo.toml | 1 - vortex-datetime-dtype/src/temporal.rs | 11 +- vortex-sampling-compressor/Cargo.toml | 1 - vortex-sampling-compressor/src/lib.rs | 25 +- vortex-serde/Cargo.toml | 1 - 18 files changed, 334 insertions(+), 197 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c39b0003f..3b932e3e53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -485,7 +485,6 @@ dependencies = [ "humansize", "indicatif", "itertools 0.13.0", - "lazy_static", "log", "mimalloc", "object_store", @@ -3269,7 +3268,6 @@ dependencies = [ "flexbuffers", "futures", "itertools 0.13.0", - "lazy_static", "log", "object_store", "paste", @@ -4485,7 +4483,6 @@ dependencies = [ "hashbrown 0.15.1", "humansize", "itertools 0.13.0", - "lazy_static", "log", "num-traits", "num_enum 0.7.3", @@ -4547,7 +4544,6 @@ dependencies = [ "datafusion-physical-plan", "futures", "itertools 0.13.0", - "lazy_static", "log", "object_store", "pin-project", @@ -4569,7 +4565,6 @@ version = "0.14.0" dependencies = [ "arrow-schema", "jiff", - "lazy_static", "num_enum 0.7.3", "serde", "vortex-dtype", @@ -4780,7 +4775,6 @@ dependencies = [ "chrono", "fsst-rs", "itertools 0.13.0", - "lazy_static", "log", "rand", "vortex-alp", @@ -4851,7 +4845,6 @@ dependencies = [ "futures-executor", "futures-util", "itertools 0.13.0", - "lazy_static", "monoio", "object_store", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 5a01b1825c..8dce279400 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -96,8 +96,6 @@ humansize = "2.1.3" indicatif = "0.17.8" itertools = "0.13.0" jiff = "0.1.8" -lazy_static = "1.4.0" -leb128 = "0.2.5" libfuzzer-sys = "0.4" log = "0.4.21" mimalloc = "0.1.42" @@ -156,7 +154,7 @@ vortex-runend-bool = { version = "0.14.0", path = "./encodings/runend-bool" } vortex-scalar = { version = "0.14.0", path = "./vortex-scalar", default-features = false } vortex-schema = { version = "0.14.0", path = "./vortex-schema" } vortex-serde = { version = "0.14.0", path = "./vortex-serde", default-features = false } -vortex-all = { version = "0.14.0", path = "./vortex-all" } +vortex-all = { version = "0.14.0", path = "./vortex-all" } vortex-sampling-compressor = { version = "0.14.0", path = "./vortex-sampling-compressor" } vortex-zigzag = { version = "0.14.0", path = "./encodings/zigzag" } # END crates published by this project diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml index c0e7cc46d7..d5c0d55c78 100644 --- a/bench-vortex/Cargo.toml +++ b/bench-vortex/Cargo.toml @@ -38,7 +38,6 @@ homedir = { workspace = true } humansize = { workspace = true } indicatif = { workspace = true } itertools = { workspace = true } -lazy_static = { workspace = true } log = { workspace = true } mimalloc = { workspace = true } object_store = { workspace = true, features = ["aws"] } diff --git a/bench-vortex/benches/datafusion.rs b/bench-vortex/benches/datafusion.rs index a31f356e98..53c382db65 100644 --- a/bench-vortex/benches/datafusion.rs +++ b/bench-vortex/benches/datafusion.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use arrow_array::builder::{StringBuilder, UInt32Builder}; use arrow_array::RecordBatch; @@ -11,7 +11,6 @@ use datafusion::execution::memory_pool::human_readable_size; use datafusion::functions_aggregate::count::count_distinct; use datafusion::logical_expr::lit; use datafusion::prelude::{col, DataFrame, SessionContext}; -use lazy_static::lazy_static; use vortex::aliases::hash_set::HashSet; use vortex::compress::CompressionStrategy; use vortex::dict::DictEncoding; @@ -26,24 +25,24 @@ use vortex::sampling_compressor::SamplingCompressor; use vortex::{Array, Context}; use vortex_datafusion::memory::{VortexMemTable, VortexMemTableOptions}; -lazy_static! { - pub static ref CTX: Context = Context::default().with_encodings([ +pub static CTX: LazyLock = LazyLock::new(|| { + Context::default().with_encodings([ &BitPackedEncoding as EncodingRef, &DictEncoding, &FoREncoding, - &DeltaEncoding - ]); -} + &DeltaEncoding, + ]) +}); -lazy_static! { - pub static ref COMPRESSORS: HashSet> = [ +pub static COMPRESSORS: LazyLock>> = LazyLock::new(|| { + [ &BITPACK_WITH_PATCHES as CompressorRef<'static>, &DictCompressor, &FoRCompressor, - &DeltaCompressor + &DeltaCompressor, ] - .into(); -} + .into() +}); fn toy_dataset_arrow() -> RecordBatch { // 64,000 rows of string and numeric data. diff --git a/bench-vortex/benches/tokio_runtime.rs b/bench-vortex/benches/tokio_runtime.rs index 67ed338406..461d7abcf4 100644 --- a/bench-vortex/benches/tokio_runtime.rs +++ b/bench-vortex/benches/tokio_runtime.rs @@ -1,11 +1,12 @@ -use lazy_static::lazy_static; -use tokio::runtime::Runtime; +use std::sync::LazyLock; + +use tokio::runtime::{Builder, Runtime}; use vortex::error::{VortexError, VortexExpect}; -lazy_static! { - pub static ref TOKIO_RUNTIME: Runtime = tokio::runtime::Builder::new_current_thread() +pub static TOKIO_RUNTIME: LazyLock = LazyLock::new(|| { + Builder::new_current_thread() .enable_all() .build() .map_err(VortexError::IOError) - .vortex_expect("tokio runtime must not fail to start"); -} + .vortex_expect("tokio runtime must not fail to start") +}); diff --git a/bench-vortex/src/lib.rs b/bench-vortex/src/lib.rs index 76767c94f2..e1232a2538 100644 --- a/bench-vortex/src/lib.rs +++ b/bench-vortex/src/lib.rs @@ -4,11 +4,10 @@ use std::env::temp_dir; use std::fs::{create_dir_all, File}; use std::future::Future; use std::path::{Path, PathBuf}; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use arrow_array::RecordBatchReader; use itertools::Itertools; -use lazy_static::lazy_static; use log::LevelFilter; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use simplelog::{ColorChoice, Config, TermLogger, TerminalMode}; @@ -44,13 +43,16 @@ pub mod taxi_data; pub mod tpch; pub mod vortex_utils; -lazy_static! { - pub static ref CTX: Arc = Arc::new( +pub static CTX: LazyLock> = LazyLock::new(|| { + Arc::new( Context::default() .with_encodings(SamplingCompressor::default().used_encodings()) - .with_encoding(&DeltaEncoding) - ); - pub static ref COMPRESSORS: HashSet> = [ + .with_encoding(&DeltaEncoding), + ) +}); + +pub static COMPRESSORS: LazyLock>> = LazyLock::new(|| { + [ &ALPCompressor as CompressorRef<'static>, &ALPRDCompressor, &DictCompressor, @@ -60,10 +62,10 @@ lazy_static! { &DateTimePartsCompressor, &DEFAULT_RUN_END_COMPRESSOR, &RoaringBoolCompressor, - &SparseCompressor + &SparseCompressor, ] - .into(); -} + .into() +}); /// Creates a file if it doesn't already exist. /// NB: Does NOT modify the given path to ensure that it resides in the data directory. diff --git a/bench-vortex/src/public_bi_data.rs b/bench-vortex/src/public_bi_data.rs index a55bad5a38..6d973729ee 100644 --- a/bench-vortex/src/public_bi_data.rs +++ b/bench-vortex/src/public_bi_data.rs @@ -1,6 +1,7 @@ use std::hash::Hash; use std::os::unix::fs::MetadataExt; use std::path::PathBuf; +use std::sync::LazyLock; use enum_iterator::Sequence; use futures::executor::block_on; @@ -22,22 +23,34 @@ use crate::reader::{ }; use crate::{idempotent, IdempotentPath}; -lazy_static::lazy_static! { - // NB: we do not expect this to change, otherwise we'd crawl the site and populate it at runtime - // We will eventually switch over to self-hosting this data, at which time this map will need - // to be updated once. - static ref URLS: HashMap> = HashMap::from([ - (AirlineSentiment, vec![ - PBIUrl::new("AirlineSentiment", "AirlineSentiment_1.csv.bz2")]), - (Arade, vec![PBIUrl::new("Arade","Arade_1.csv.bz2")]), - (Bimbo, vec![ - PBIUrl::new("Bimbo", "Bimbo_1.csv.bz2")]), - (CMSprovider, vec![ +// NB: we do not expect this to change, otherwise we'd crawl the site and populate it at runtime +// We will eventually switch over to self-hosting this data, at which time this map will need +// to be updated once. +static URLS: LazyLock>> = LazyLock::new(|| { + HashMap::from([ + ( + AirlineSentiment, + vec![PBIUrl::new( + "AirlineSentiment", + "AirlineSentiment_1.csv.bz2", + )], + ), + (Arade, vec![PBIUrl::new("Arade", "Arade_1.csv.bz2")]), + (Bimbo, vec![PBIUrl::new("Bimbo", "Bimbo_1.csv.bz2")]), + ( + CMSprovider, + vec![ PBIUrl::new("CMSprovider", "CMSprovider_1.csv.bz2"), - PBIUrl::new("CMSprovider", "CMSprovider_2.csv.bz2")]), - (CityMaxCapita, vec![ - PBIUrl::new("CityMaxCapita", "CityMaxCapita_1.csv.bz2")]), - (CommonGovernment, vec![ + PBIUrl::new("CMSprovider", "CMSprovider_2.csv.bz2"), + ], + ), + ( + CityMaxCapita, + vec![PBIUrl::new("CityMaxCapita", "CityMaxCapita_1.csv.bz2")], + ), + ( + CommonGovernment, + vec![ PBIUrl::new("CommonGovernment", "CommonGovernment_1.csv.bz2"), PBIUrl::new("CommonGovernment", "CommonGovernment_2.csv.bz2"), PBIUrl::new("CommonGovernment", "CommonGovernment_3.csv.bz2"), @@ -50,33 +63,52 @@ lazy_static::lazy_static! { PBIUrl::new("CommonGovernment", "CommonGovernment_10.csv.bz2"), PBIUrl::new("CommonGovernment", "CommonGovernment_11.csv.bz2"), PBIUrl::new("CommonGovernment", "CommonGovernment_12.csv.bz2"), - PBIUrl::new("CommonGovernment", "CommonGovernment_13.csv.bz2")]), - (Corporations, vec![ - PBIUrl::new("Corporations", "Corporations_1.csv.bz2")]), - (Eixo, vec![ - PBIUrl::new("Eixo", "Eixo_1.csv.bz2")]), - (Euro2016, vec![ - PBIUrl::new("Euro2016", "Euro2016_1.csv.bz2")]), - (Food, vec![ - PBIUrl::new("Food", "Food_1.csv.bz2")]), - (Generico, vec![ + PBIUrl::new("CommonGovernment", "CommonGovernment_13.csv.bz2"), + ], + ), + ( + Corporations, + vec![PBIUrl::new("Corporations", "Corporations_1.csv.bz2")], + ), + (Eixo, vec![PBIUrl::new("Eixo", "Eixo_1.csv.bz2")]), + ( + Euro2016, + vec![PBIUrl::new("Euro2016", "Euro2016_1.csv.bz2")], + ), + (Food, vec![PBIUrl::new("Food", "Food_1.csv.bz2")]), + ( + Generico, + vec![ PBIUrl::new("Generico", "Generico_1.csv.bz2"), PBIUrl::new("Generico", "Generico_2.csv.bz2"), PBIUrl::new("Generico", "Generico_3.csv.bz2"), PBIUrl::new("Generico", "Generico_4.csv.bz2"), - PBIUrl::new("Generico", "Generico_5.csv.bz2"),]), - (HashTags, vec![ - PBIUrl::new("HashTags", "HashTags_1.csv.bz2")]), - (Hatred, vec![ - PBIUrl::new("Hatred", "Hatred_1.csv.bz2")]), - (IGlocations1, vec![ - PBIUrl::new("IGlocations1", "IGlocations1_1.csv.bz2")]), - (IGlocations2, vec![ + PBIUrl::new("Generico", "Generico_5.csv.bz2"), + ], + ), + ( + HashTags, + vec![PBIUrl::new("HashTags", "HashTags_1.csv.bz2")], + ), + (Hatred, vec![PBIUrl::new("Hatred", "Hatred_1.csv.bz2")]), + ( + IGlocations1, + vec![PBIUrl::new("IGlocations1", "IGlocations1_1.csv.bz2")], + ), + ( + IGlocations2, + vec![ PBIUrl::new("IGlocations2", "IGlocations2_1.csv.bz2"), - PBIUrl::new("IGlocations2", "IGlocations2_2.csv.bz2")]), - (IUBLibrary, vec![ - PBIUrl::new("IUBLibrary", "IUBLibrary_1.csv.bz2")]), - (MLB, vec![ + PBIUrl::new("IGlocations2", "IGlocations2_2.csv.bz2"), + ], + ), + ( + IUBLibrary, + vec![PBIUrl::new("IUBLibrary", "IUBLibrary_1.csv.bz2")], + ), + ( + MLB, + vec![ PBIUrl::new("MLB", "MLB_1.csv.bz2"), PBIUrl::new("MLB", "MLB_2.csv.bz2"), PBIUrl::new("MLB", "MLB_3.csv.bz2"), @@ -144,35 +176,71 @@ lazy_static::lazy_static! { PBIUrl::new("MLB", "MLB_65.csv.bz2"), PBIUrl::new("MLB", "MLB_66.csv.bz2"), PBIUrl::new("MLB", "MLB_67.csv.bz2"), - PBIUrl::new("MLB", "MLB_68.csv.bz2")]), - (MedPayment1, vec![ - PBIUrl::new("MedPayment1", "MedPayment1_1.csv.bz2")]), - (MedPayment2, vec![ - PBIUrl::new("MedPayment2", "MedPayment2_1.csv.bz2")]), - (Medicare1, vec![ + PBIUrl::new("MLB", "MLB_68.csv.bz2"), + ], + ), + ( + MedPayment1, + vec![PBIUrl::new("MedPayment1", "MedPayment1_1.csv.bz2")], + ), + ( + MedPayment2, + vec![PBIUrl::new("MedPayment2", "MedPayment2_1.csv.bz2")], + ), + ( + Medicare1, + vec![ PBIUrl::new("Medicare1", "Medicare1_1.csv.bz2"), - PBIUrl::new("Medicare1", "Medicare1_2.csv.bz2")]), - (Medicare2, vec![ + PBIUrl::new("Medicare1", "Medicare1_2.csv.bz2"), + ], + ), + ( + Medicare2, + vec![ PBIUrl::new("Medicare2", "Medicare2_1.csv.bz2"), - PBIUrl::new("Medicare2", "Medicare2_2.csv.bz2")]), - (Medicare3, vec![ - PBIUrl::new("Medicare3", "Medicare3_1.csv.bz2")]), - (Motos, vec![ + PBIUrl::new("Medicare2", "Medicare2_2.csv.bz2"), + ], + ), + ( + Medicare3, + vec![PBIUrl::new("Medicare3", "Medicare3_1.csv.bz2")], + ), + ( + Motos, + vec![ PBIUrl::new("Motos", "Motos_1.csv.bz2"), - PBIUrl::new("Motos", "Motos_2.csv.bz2")]), - (MulheresMil, vec![ - PBIUrl::new("MulheresMil", "MulheresMil_1.csv.bz2")]), - (NYC, vec![ + PBIUrl::new("Motos", "Motos_2.csv.bz2"), + ], + ), + ( + MulheresMil, + vec![PBIUrl::new("MulheresMil", "MulheresMil_1.csv.bz2")], + ), + ( + NYC, + vec![ PBIUrl::new("NYC", "NYC_1.csv.bz2"), - PBIUrl::new("NYC", "NYC_2.csv.bz2")]), - (PanCreactomy1, vec![ - PBIUrl::new("PanCreactomy1", "PanCreactomy1_1.csv.bz2")]), - (PanCreactomy2, vec![ + PBIUrl::new("NYC", "NYC_2.csv.bz2"), + ], + ), + ( + PanCreactomy1, + vec![PBIUrl::new("PanCreactomy1", "PanCreactomy1_1.csv.bz2")], + ), + ( + PanCreactomy2, + vec![ PBIUrl::new("PanCreactomy2", "PanCreactomy2_1.csv.bz2"), - PBIUrl::new("PanCreactomy2", "PanCreactomy2_2.csv.bz2")]), - (Physicians, vec![ - PBIUrl::new("Physicians", "Physicians_1.csv.bz2")]), - (Provider, vec![ + PBIUrl::new("PanCreactomy2", "PanCreactomy2_2.csv.bz2"), + ], + ), + ( + Physicians, + vec![PBIUrl::new("Physicians", "Physicians_1.csv.bz2")], + ), + ( + Provider, + vec![ PBIUrl::new("Provider", "Provider_1.csv.bz2"), PBIUrl::new("Provider", "Provider_2.csv.bz2"), PBIUrl::new("Provider", "Provider_3.csv.bz2"), @@ -180,33 +248,56 @@ lazy_static::lazy_static! { PBIUrl::new("Provider", "Provider_5.csv.bz2"), PBIUrl::new("Provider", "Provider_6.csv.bz2"), PBIUrl::new("Provider", "Provider_7.csv.bz2"), - PBIUrl::new("Provider", "Provider_8.csv.bz2")]), - (RealEstate1, vec![ + PBIUrl::new("Provider", "Provider_8.csv.bz2"), + ], + ), + ( + RealEstate1, + vec![ PBIUrl::new("RealEstate1", "RealEstate1_1.csv.bz2"), - PBIUrl::new("RealEstate1", "RealEstate1_2.csv.bz2")]), - (RealEstate2, vec![ + PBIUrl::new("RealEstate1", "RealEstate1_2.csv.bz2"), + ], + ), + ( + RealEstate2, + vec![ PBIUrl::new("RealEstate2", "RealEstate2_1.csv.bz2"), PBIUrl::new("RealEstate2", "RealEstate2_2.csv.bz2"), PBIUrl::new("RealEstate2", "RealEstate2_3.csv.bz2"), PBIUrl::new("RealEstate2", "RealEstate2_4.csv.bz2"), PBIUrl::new("RealEstate2", "RealEstate2_5.csv.bz2"), PBIUrl::new("RealEstate2", "RealEstate2_6.csv.bz2"), - PBIUrl::new("RealEstate2", "RealEstate2_7.csv.bz2")]), - (Redfin1, vec![ + PBIUrl::new("RealEstate2", "RealEstate2_7.csv.bz2"), + ], + ), + ( + Redfin1, + vec![ PBIUrl::new("Redfin1", "Redfin1_1.csv.bz2"), PBIUrl::new("Redfin1", "Redfin1_2.csv.bz2"), PBIUrl::new("Redfin1", "Redfin1_3.csv.bz2"), - PBIUrl::new("Redfin1", "Redfin1_4.csv.bz2")]), - (Redfin2, vec![ + PBIUrl::new("Redfin1", "Redfin1_4.csv.bz2"), + ], + ), + ( + Redfin2, + vec![ PBIUrl::new("Redfin2", "Redfin2_1.csv.bz2"), PBIUrl::new("Redfin2", "Redfin2_2.csv.bz2"), - PBIUrl::new("Redfin2", "Redfin2_3.csv.bz2")]), - (Redfin3, vec![ + PBIUrl::new("Redfin2", "Redfin2_3.csv.bz2"), + ], + ), + ( + Redfin3, + vec![ PBIUrl::new("Redfin3", "Redfin3_1.csv.bz2"), - PBIUrl::new("Redfin3", "Redfin3_2.csv.bz2")]), - (Redfin4, vec![ - PBIUrl::new("Redfin4", "Redfin4_1.csv.bz2")]), - (Rentabilidad, vec![ + PBIUrl::new("Redfin3", "Redfin3_2.csv.bz2"), + ], + ), + (Redfin4, vec![PBIUrl::new("Redfin4", "Redfin4_1.csv.bz2")]), + ( + Rentabilidad, + vec![ PBIUrl::new("Rentabilidad", "Rentabilidad_1.csv.bz2"), PBIUrl::new("Rentabilidad", "Rentabilidad_2.csv.bz2"), PBIUrl::new("Rentabilidad", "Rentabilidad_3.csv.bz2"), @@ -215,11 +306,19 @@ lazy_static::lazy_static! { PBIUrl::new("Rentabilidad", "Rentabilidad_6.csv.bz2"), PBIUrl::new("Rentabilidad", "Rentabilidad_7.csv.bz2"), PBIUrl::new("Rentabilidad", "Rentabilidad_8.csv.bz2"), - PBIUrl::new("Rentabilidad", "Rentabilidad_9.csv.bz2")]), - (Romance, vec![ + PBIUrl::new("Rentabilidad", "Rentabilidad_9.csv.bz2"), + ], + ), + ( + Romance, + vec![ PBIUrl::new("Romance", "Romance_1.csv.bz2"), - PBIUrl::new("Romance", "Romance_2.csv.bz2")]), - (SalariesFrance, vec![ + PBIUrl::new("Romance", "Romance_2.csv.bz2"), + ], + ), + ( + SalariesFrance, + vec![ PBIUrl::new("SalariesFrance", "SalariesFrance_1.csv.bz2"), PBIUrl::new("SalariesFrance", "SalariesFrance_2.csv.bz2"), PBIUrl::new("SalariesFrance", "SalariesFrance_3.csv.bz2"), @@ -232,8 +331,12 @@ lazy_static::lazy_static! { PBIUrl::new("SalariesFrance", "SalariesFrance_10.csv.bz2"), PBIUrl::new("SalariesFrance", "SalariesFrance_11.csv.bz2"), PBIUrl::new("SalariesFrance", "SalariesFrance_12.csv.bz2"), - PBIUrl::new("SalariesFrance", "SalariesFrance_13.csv.bz2")]), - (TableroSistemaPenal, vec![ + PBIUrl::new("SalariesFrance", "SalariesFrance_13.csv.bz2"), + ], + ), + ( + TableroSistemaPenal, + vec![ PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_1.csv.bz2"), PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_2.csv.bz2"), PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_3.csv.bz2"), @@ -241,8 +344,12 @@ lazy_static::lazy_static! { PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_5.csv.bz2"), PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_6.csv.bz2"), PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_7.csv.bz2"), - PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_8.csv.bz2")]), - (Taxpayer, vec![ + PBIUrl::new("TableroSistemaPenal", "TableroSistemaPenal_8.csv.bz2"), + ], + ), + ( + Taxpayer, + vec![ PBIUrl::new("Taxpayer", "Taxpayer_1.csv.bz2"), PBIUrl::new("Taxpayer", "Taxpayer_2.csv.bz2"), PBIUrl::new("Taxpayer", "Taxpayer_3.csv.bz2"), @@ -252,36 +359,59 @@ lazy_static::lazy_static! { PBIUrl::new("Taxpayer", "Taxpayer_7.csv.bz2"), PBIUrl::new("Taxpayer", "Taxpayer_8.csv.bz2"), PBIUrl::new("Taxpayer", "Taxpayer_9.csv.bz2"), - PBIUrl::new("Taxpayer", "Taxpayer_10.csv.bz2")]), - (Telco, vec![ - PBIUrl::new("Telco", "Telco_1.csv.bz2")]), - (TrainsUK1, vec![ + PBIUrl::new("Taxpayer", "Taxpayer_10.csv.bz2"), + ], + ), + (Telco, vec![PBIUrl::new("Telco", "Telco_1.csv.bz2")]), + ( + TrainsUK1, + vec![ PBIUrl::new("TrainsUK1", "TrainsUK1_1.csv.bz2"), PBIUrl::new("TrainsUK1", "TrainsUK1_2.csv.bz2"), PBIUrl::new("TrainsUK1", "TrainsUK1_3.csv.bz2"), - PBIUrl::new("TrainsUK1", "TrainsUK1_4.csv.bz2")]), - (TrainsUK2, vec![ + PBIUrl::new("TrainsUK1", "TrainsUK1_4.csv.bz2"), + ], + ), + ( + TrainsUK2, + vec![ PBIUrl::new("TrainsUK2", "TrainsUK2_1.csv.bz2"), - PBIUrl::new("TrainsUK2", "TrainsUK2_2.csv.bz2")]), - (USCensus, vec![ + PBIUrl::new("TrainsUK2", "TrainsUK2_2.csv.bz2"), + ], + ), + ( + USCensus, + vec![ PBIUrl::new("USCensus", "USCensus_1.csv.bz2"), PBIUrl::new("USCensus", "USCensus_2.csv.bz2"), - PBIUrl::new("USCensus", "USCensus_3.csv.bz2")]), - (Uberlandia, vec![ - PBIUrl::new("Uberlandia", "Uberlandia_1.csv.bz2")]), - (Wins, vec![ + PBIUrl::new("USCensus", "USCensus_3.csv.bz2"), + ], + ), + ( + Uberlandia, + vec![PBIUrl::new("Uberlandia", "Uberlandia_1.csv.bz2")], + ), + ( + Wins, + vec![ PBIUrl::new("Wins", "Wins_1.csv.bz2"), PBIUrl::new("Wins", "Wins_2.csv.bz2"), PBIUrl::new("Wins", "Wins_3.csv.bz2"), - PBIUrl::new("Wins", "Wins_4.csv.bz2")]), - (YaleLanguages, vec![ + PBIUrl::new("Wins", "Wins_4.csv.bz2"), + ], + ), + ( + YaleLanguages, + vec![ PBIUrl::new("YaleLanguages", "YaleLanguages_1.csv.bz2"), PBIUrl::new("YaleLanguages", "YaleLanguages_2.csv.bz2"), PBIUrl::new("YaleLanguages", "YaleLanguages_3.csv.bz2"), PBIUrl::new("YaleLanguages", "YaleLanguages_4.csv.bz2"), - PBIUrl::new("YaleLanguages", "YaleLanguages_5.csv.bz2")]), - ]); -} + PBIUrl::new("YaleLanguages", "YaleLanguages_5.csv.bz2"), + ], + ), + ]) +}); impl PBIDataset { pub fn dataset_name(&self) -> &str { diff --git a/bench-vortex/src/tpch/schema.rs b/bench-vortex/src/tpch/schema.rs index 6d15ec37f8..384217d135 100644 --- a/bench-vortex/src/tpch/schema.rs +++ b/bench-vortex/src/tpch/schema.rs @@ -1,22 +1,29 @@ +use std::sync::LazyLock; + /// Arrow schemas for TPC-H tables. /// /// Adapted from the SQL definitions in https://github.com/dimitri/tpch-citus/blob/master/schema/tpch-schema.sql use arrow_schema::{DataType, Field, Schema}; -use lazy_static::lazy_static; -lazy_static! { - pub static ref NATION: Schema = Schema::new(vec![ +pub static NATION: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("n_nationkey", DataType::Int64, false), Field::new("n_name", DataType::Utf8View, false), Field::new("n_regionkey", DataType::Int64, false), Field::new("n_comment", DataType::Utf8View, true), - ]); - pub static ref REGION: Schema = Schema::new(vec![ + ]) +}); + +pub static REGION: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("r_regionkey", DataType::Int64, false), Field::new("r_name", DataType::Utf8View, false), Field::new("r_comment", DataType::Utf8View, true), - ]); - pub static ref PART: Schema = Schema::new(vec![ + ]) +}); + +pub static PART: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("p_partkey", DataType::Int64, false), Field::new("p_name", DataType::Utf8View, false), Field::new("p_mfgr", DataType::Utf8View, false), @@ -26,8 +33,11 @@ lazy_static! { Field::new("p_container", DataType::Utf8View, false), Field::new("p_retailprice", DataType::Float64, false), Field::new("p_comment", DataType::Utf8View, false), - ]); - pub static ref SUPPLIER: Schema = Schema::new(vec![ + ]) +}); + +pub static SUPPLIER: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("s_suppkey", DataType::Int64, false), Field::new("s_name", DataType::Utf8View, false), Field::new("s_address", DataType::Utf8View, false), @@ -35,15 +45,21 @@ lazy_static! { Field::new("s_phone", DataType::Utf8View, false), Field::new("s_acctbal", DataType::Float64, false), Field::new("s_comment", DataType::Utf8View, false), - ]); - pub static ref PARTSUPP: Schema = Schema::new(vec![ + ]) +}); + +pub static PARTSUPP: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("ps_partkey", DataType::Int64, false), Field::new("ps_suppkey", DataType::Int64, false), Field::new("ps_availqty", DataType::Int64, false), Field::new("ps_supplycost", DataType::Float64, false), Field::new("ps_comment", DataType::Utf8View, false), - ]); - pub static ref CUSTOMER: Schema = Schema::new(vec![ + ]) +}); + +pub static CUSTOMER: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("c_custkey", DataType::Int64, false), Field::new("c_name", DataType::Utf8View, false), Field::new("c_address", DataType::Utf8View, false), @@ -52,8 +68,11 @@ lazy_static! { Field::new("c_acctbal", DataType::Float64, false), Field::new("c_mktsegment", DataType::Utf8View, false), Field::new("c_comment", DataType::Utf8View, false), - ]); - pub static ref ORDERS: Schema = Schema::new(vec![ + ]) +}); + +pub static ORDERS: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("o_orderkey", DataType::Int64, false), Field::new("o_custkey", DataType::Int64, false), Field::new("o_orderstatus", DataType::Utf8View, false), @@ -63,8 +82,11 @@ lazy_static! { Field::new("o_clerk", DataType::Utf8View, false), Field::new("o_shippriority", DataType::Int32, false), Field::new("o_comment", DataType::Utf8View, false), - ]); - pub static ref LINEITEM: Schema = Schema::new(vec![ + ]) +}); + +pub static LINEITEM: LazyLock = LazyLock::new(|| { + Schema::new(vec![ Field::new("l_orderkey", DataType::Int64, false), Field::new("l_partkey", DataType::Int64, false), Field::new("l_suppkey", DataType::Int64, false), @@ -81,5 +103,5 @@ lazy_static! { Field::new("l_shipinstruct", DataType::Utf8View, false), Field::new("l_shipmode", DataType::Utf8View, false), Field::new("l_comment", DataType::Utf8View, false), - ]); -} + ]) +}); diff --git a/pyvortex/Cargo.toml b/pyvortex/Cargo.toml index b98b8eb4de..c8bc1ff784 100644 --- a/pyvortex/Cargo.toml +++ b/pyvortex/Cargo.toml @@ -27,7 +27,6 @@ arrow = { workspace = true, features = ["pyarrow"] } flexbuffers = { workspace = true } futures = { workspace = true } itertools = { workspace = true } -lazy_static = { workspace = true } log = { workspace = true } paste = { workspace = true } pyo3 = { workspace = true } diff --git a/pyvortex/src/lib.rs b/pyvortex/src/lib.rs index 0ae11b8baa..8e00c6f435 100644 --- a/pyvortex/src/lib.rs +++ b/pyvortex/src/lib.rs @@ -1,5 +1,7 @@ #![allow(unsafe_op_in_unsafe_fn)] +use std::sync::LazyLock; + use array::PyArray; use expr::PyExpr; use pyo3::exceptions::PyRuntimeError; @@ -16,17 +18,16 @@ mod io; mod object_store_urls; mod python_repr; mod scalar; -use lazy_static::lazy_static; use log::LevelFilter; use pyo3_log::{Caching, Logger}; use tokio::runtime::Runtime; use vortex::error::{VortexError, VortexExpect as _}; -lazy_static! { - static ref TOKIO_RUNTIME: Runtime = Runtime::new() +pub static TOKIO_RUNTIME: LazyLock = LazyLock::new(|| { + Runtime::new() .map_err(VortexError::IOError) - .vortex_expect("tokio runtime must not fail to start"); -} + .vortex_expect("tokio runtime must not fail to start") +}); /// Vortex is an Apache Arrow-compatible toolkit for working with compressed array data. #[pymodule] diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 93b663ba7c..492594440e 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -38,7 +38,6 @@ futures-util = { workspace = true } hashbrown = { workspace = true } humansize = { workspace = true } itertools = { workspace = true } -lazy_static = { workspace = true } log = { workspace = true } num-traits = { workspace = true } num_enum = { workspace = true } diff --git a/vortex-datafusion/Cargo.toml b/vortex-datafusion/Cargo.toml index 033a0e45b5..5f57463ad0 100644 --- a/vortex-datafusion/Cargo.toml +++ b/vortex-datafusion/Cargo.toml @@ -30,7 +30,6 @@ datafusion-physical-expr = { workspace = true } datafusion-physical-plan = { workspace = true } futures = { workspace = true } itertools = { workspace = true } -lazy_static = { workspace = true } log = { workspace = true } object_store = { workspace = true } pin-project = { workspace = true } diff --git a/vortex-datafusion/src/plans.rs b/vortex-datafusion/src/plans.rs index 487571fd01..bcbea01944 100644 --- a/vortex-datafusion/src/plans.rs +++ b/vortex-datafusion/src/plans.rs @@ -3,7 +3,7 @@ use std::any::Any; use std::fmt::{Debug, Formatter}; use std::pin::Pin; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use std::task::{Context, Poll}; use arrow_array::cast::AsArray; @@ -17,7 +17,6 @@ use datafusion_physical_plan::{ DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, }; use futures::{ready, Stream}; -use lazy_static::lazy_static; use pin_project::pin_project; use vortex_array::array::ChunkedArray; use vortex_array::arrow::FromArrowArray; @@ -38,14 +37,13 @@ pub(crate) struct RowSelectorExec { chunked_array: ChunkedArray, } -lazy_static! { - static ref ROW_SELECTOR_SCHEMA_REF: SchemaRef = - Arc::new(Schema::new(vec![arrow_schema::Field::new( - "row_idx", - DataType::UInt64, - false - )])); -} +static ROW_SELECTOR_SCHEMA_REF: LazyLock = LazyLock::new(|| { + Arc::new(Schema::new(vec![arrow_schema::Field::new( + "row_idx", + DataType::UInt64, + false, + )])) +}); impl RowSelectorExec { pub(crate) fn try_new( diff --git a/vortex-datetime-dtype/Cargo.toml b/vortex-datetime-dtype/Cargo.toml index 3ae94eee8e..975d832b18 100644 --- a/vortex-datetime-dtype/Cargo.toml +++ b/vortex-datetime-dtype/Cargo.toml @@ -16,7 +16,6 @@ readme = { workspace = true } [dependencies] arrow-schema = { workspace = true, optional = true } jiff = { workspace = true } -lazy_static = { workspace = true } num_enum = { workspace = true } serde = { workspace = true, features = ["derive"] } vortex-dtype = { workspace = true, features = ["serde"] } diff --git a/vortex-datetime-dtype/src/temporal.rs b/vortex-datetime-dtype/src/temporal.rs index eb46dfc143..4106c8211e 100644 --- a/vortex-datetime-dtype/src/temporal.rs +++ b/vortex-datetime-dtype/src/temporal.rs @@ -1,18 +1,15 @@ use std::fmt::Display; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use jiff::civil::{Date, Time}; use jiff::{Timestamp, Zoned}; -use lazy_static::lazy_static; use vortex_dtype::ExtID; use crate::unit::TimeUnit; -lazy_static! { - pub static ref TIME_ID: ExtID = ExtID::from("vortex.time"); - pub static ref DATE_ID: ExtID = ExtID::from("vortex.date"); - pub static ref TIMESTAMP_ID: ExtID = ExtID::from("vortex.timestamp"); -} +pub static TIME_ID: LazyLock = LazyLock::new(|| ExtID::from("vortex.time")); +pub static DATE_ID: LazyLock = LazyLock::new(|| ExtID::from("vortex.date")); +pub static TIMESTAMP_ID: LazyLock = LazyLock::new(|| ExtID::from("vortex.timestamp")); pub fn is_temporal_ext_type(id: &ExtID) -> bool { [&DATE_ID as &ExtID, &TIME_ID, &TIMESTAMP_ID].contains(&id) diff --git a/vortex-sampling-compressor/Cargo.toml b/vortex-sampling-compressor/Cargo.toml index 5e5958a5a2..ad6b22cc6e 100644 --- a/vortex-sampling-compressor/Cargo.toml +++ b/vortex-sampling-compressor/Cargo.toml @@ -17,7 +17,6 @@ readme = { workspace = true } arbitrary = { workspace = true, optional = true } fsst-rs = { workspace = true } itertools = { workspace = true } -lazy_static = { workspace = true } log = { workspace = true } rand = { workspace = true } vortex-alp = { workspace = true } diff --git a/vortex-sampling-compressor/src/lib.rs b/vortex-sampling-compressor/src/lib.rs index 64d558feb2..0a15cf2ee8 100644 --- a/vortex-sampling-compressor/src/lib.rs +++ b/vortex-sampling-compressor/src/lib.rs @@ -1,11 +1,10 @@ use std::fmt::{Debug, Display, Formatter}; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use compressors::bitpacked::BITPACK_WITH_PATCHES; use compressors::chunked::DEFAULT_CHUNKED_COMPRESSOR; use compressors::fsst::FSSTCompressor; use compressors::struct_::StructCompressor; -use lazy_static::lazy_static; use log::{debug, warn}; use rand::rngs::StdRng; use rand::SeedableRng; @@ -47,8 +46,8 @@ pub mod compressors; mod constants; mod sampling; -lazy_static! { - pub static ref DEFAULT_COMPRESSORS: [CompressorRef<'static>; 9] = [ +pub static DEFAULT_COMPRESSORS: LazyLock<[CompressorRef<'static>; 9]> = LazyLock::new(|| { + [ &ALPCompressor as CompressorRef, &BITPACK_WITH_PATCHES, &DateTimePartsCompressor, @@ -61,19 +60,23 @@ lazy_static! { // &RoaringIntCompressor, &SparseCompressor, &ZigZagCompressor, - ]; + ] +}); - pub static ref FASTEST_COMPRESSORS: [CompressorRef<'static>; 7] = [ +pub static FASTEST_COMPRESSORS: LazyLock<[CompressorRef<'static>; 7]> = LazyLock::new(|| { + [ &BITPACK_WITH_PATCHES, &DateTimePartsCompressor, &DEFAULT_RUN_END_COMPRESSOR, // replace with FastLanes RLE - &DictCompressor, // replace with FastLanes Dictionary + &DictCompressor, // replace with FastLanes Dictionary &FoRCompressor, &SparseCompressor, &ZigZagCompressor, - ]; + ] +}); - pub static ref ALL_COMPRESSORS_CONTEXT: Arc = Arc::new(Context::default().with_encodings([ +pub static ALL_COMPRESSORS_CONTEXT: LazyLock> = LazyLock::new(|| { + Arc::new(Context::default().with_encodings([ &ALPEncoding as EncodingRef, &ByteBoolEncoding, &DateTimePartsEncoding, @@ -88,8 +91,8 @@ lazy_static! { &RunEndBoolEncoding, &ZigZagEncoding, &ALPRDEncoding, - ])); -} + ])) +}); #[derive(Debug, Clone)] pub enum Objective { diff --git a/vortex-serde/Cargo.toml b/vortex-serde/Cargo.toml index d61f8e7675..4b66f827ff 100644 --- a/vortex-serde/Cargo.toml +++ b/vortex-serde/Cargo.toml @@ -24,7 +24,6 @@ futures = { workspace = true } futures-executor = { workspace = true } futures-util = { workspace = true } itertools = { workspace = true } -lazy_static = { workspace = true } monoio = { workspace = true, optional = true, features = ["bytes"] } object_store = { workspace = true, optional = true } once_cell = { workspace = true }