From 993f0a3b4623885a081eaf33b46a84bdb3bfb981 Mon Sep 17 00:00:00 2001 From: Arnaud Larroque <513142+nonontb@users.noreply.github.com> Date: Fri, 25 Feb 2022 13:20:01 +0100 Subject: [PATCH] Changes after went through "Datafusion as a library section" (#1868) * Changes after went through Datafusion as a library section * Run prettier on library.md Co-authored-by: Arnaud L <3510366-Nonontb@users.noreply.gitlab.com> --- docs/source/user-guide/library.md | 58 ++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/docs/source/user-guide/library.md b/docs/source/user-guide/library.md index f4c5083cfbb7..f0be42c972f0 100644 --- a/docs/source/user-guide/library.md +++ b/docs/source/user-guide/library.md @@ -19,6 +19,23 @@ # Using DataFusion as a library +## Create a new project + +```shell +cargo new hello_datafusion +``` + +```shell +$ cd hello_datafusion +$ tree . +. +├── Cargo.toml +└── src + └── main.rs + +1 directory, 2 files +``` + ## Default Configuration DataFusion is [published on crates.io](https://crates.io/crates/datafusion), and is [well documented on docs.rs](https://docs.rs/datafusion/). @@ -27,7 +44,29 @@ To get started, add the following to your `Cargo.toml` file: ```toml [dependencies] -datafusion = "5.1.0" +datafusion = "7.0.0" +``` + +## Create a main function + +Update the main.rs file with your first datafusion application based on [Example usage](https://arrow.apache.org/datafusion/user-guide/example-usage.html) + +```rust +use datafusion::prelude::*; + +#[tokio::main] +async fn main() -> datafusion::error::Result<()> { + // register the table + let mut ctx = ExecutionContext::new(); + ctx.register_csv("test", "", CsvReadOptions::new()).await?; + + // create a plan to run a SQL query + let df = ctx.sql("SELECT * FROM test").await?; + + // execute and print results + df.show().await?; + Ok(()) +} ``` ## Optimized Configuration @@ -37,7 +76,7 @@ worth noting that using the settings in the `[profile.release]` section will sig ```toml [dependencies] -datafusion = { version = "5.0" , features = ["simd"]} +datafusion = { version = "7.0" , features = ["simd"]} tokio = { version = "^1.0", features = ["rt-multi-thread"] } snmalloc-rs = "0.2" @@ -49,12 +88,23 @@ codegen-units = 1 Then, in `main.rs.` update the memory allocator with the below after your imports: ```rust +use datafusion::prelude::*; + #[global_allocator] static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc; + +async fn main() -> datafusion::error::Result<()> { + ... +} +``` + +Finally, in order to build with the `simd` optimization `cargo nightly` is required. + +```shell +rustup toolchain install nightly ``` -Finally, in order to build with the `simd` optimization `cargo nightly` is required. Based on the instruction -set architecture you are building on you will want to configure the `target-cpu` as well, ideally +Based on the instruction set architecture you are building on you will want to configure the `target-cpu` as well, ideally with `native` or at least `avx2`. ```