From 04347d6412d1559daba168582c0c8dacb34f2397 Mon Sep 17 00:00:00 2001 From: Will Manning Date: Wed, 13 Nov 2024 14:05:30 +0000 Subject: [PATCH] feat: minor docs improvements (#1277) --- docs/index.rst | 17 +++++++++++------ docs/quickstart.rst | 13 +++++++------ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 03475e7a23..fecd50aab4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,8 +3,8 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Wide, Fast & Compact. Pick Three. -================================== +Vortex: a State-of-the-Art Columnar File Format +============================================== .. grid:: 1 1 2 2 :gutter: 4 4 4 4 @@ -32,14 +32,19 @@ Wide, Fast & Compact. Pick Three. Random access, throughput, and TPC-H. -Vortex is a fast & extensible columnar file format that is based around state-of-the-art research -from the database community. It is built around cascading compression with lightweight encodings (no -block compression), allowing for both efficient random access and extremely fast decompression. +Vortex is a fast & extensible columnar file format that is based around the latest research from the +database community. It is built around cascading compression with lightweight, vectorized encodings +(i.e., no block compression), allowing for both efficient random access and extremely fast +decompression. -Vortex also includes an accompanying in-memory format for these (recursively) compressed arrays, +Vortex includes an accompanying in-memory format for these (recursively) compressed arrays, that is zero-copy compatible with Apache Arrow in uncompressed form. Taken together, the Vortex library is a useful toolkit with compressed Arrow data in-memory, on-disk, & over-the-wire. +Vortex consolidates the metadata in a series of flatbuffers in the footer, in order to minimize +the number of reads (important when reading from object storage) & the deserialization overhead +(important for wide tables with many columns). + Vortex aspires to succeed Apache Parquet by pushing the Pareto frontier outwards: 1-2x faster writes, 2-10x faster scans, and 100-200x faster random access reads, while preserving the same approximate compression ratio as Parquet v2 with zstd. diff --git a/docs/quickstart.rst b/docs/quickstart.rst index d1318ea36e..b7f5baafcc 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -141,20 +141,21 @@ Use the sampling compressor to compress the Vortex array and check the relative Write ^^^^^ -Reading and writing both require an async runtime, in this example we use Tokio. The LayoutWriter -knows how to write Vortex arrays to disk: +Reading and writing both require an async runtime, in this example we use Tokio. The +:class:`~vortex_serde::file::write::writer::VortexFileWriter` knows how to write Vortex arrays to +disk: .. code-block:: rust use std::path::Path; use tokio::fs::File as TokioFile; - use vortex::serde::layouts::LayoutWriter; + use vortex_serde::file::write::writer::VortexFileWriter; let file = TokioFile::create(Path::new("example.vortex")) .await .unwrap(); - let writer = LayoutWriter::new(file) + let writer = VortexFileWriter::new(file) .write_array_columns(cvtx.clone()) .await .unwrap(); @@ -167,10 +168,10 @@ Read use futures::TryStreamExt; use vortex::sampling_compressor::ALL_COMPRESSORS_CONTEXT; - use vortex::serde::layouts::{LayoutContext, LayoutDeserializer, LayoutReaderBuilder}; + use vortex_serde::file::read::builder::{VortexReadBuilder, LayoutDeserializer}; let file = TokioFile::open(Path::new("example.vortex")).await.unwrap(); - let builder = LayoutReaderBuilder::new( + let builder = VortexReadBuilder::new( file, LayoutDeserializer::new( ALL_COMPRESSORS_CONTEXT.clone(),