From 9cc3fe3e82bf916bb5661feff1dd2810ae3acd48 Mon Sep 17 00:00:00 2001 From: Josiah Parry Date: Tue, 26 Nov 2024 14:23:07 -0800 Subject: [PATCH] add section on serde --- .../execute-results/html.json | 15 + _quarto.yml | 1 + user-guide/serde-integration.qmd | 261 ++++++++++++++++++ 3 files changed, 277 insertions(+) create mode 100644 _freeze/user-guide/serde-integration/execute-results/html.json create mode 100644 user-guide/serde-integration.qmd diff --git a/_freeze/user-guide/serde-integration/execute-results/html.json b/_freeze/user-guide/serde-integration/execute-results/html.json new file mode 100644 index 0000000..7dcdd40 --- /dev/null +++ b/_freeze/user-guide/serde-integration/execute-results/html.json @@ -0,0 +1,15 @@ +{ + "hash": "e9571e09ab9c84dba04370dcf3a82175", + "result": { + "engine": "knitr", + "markdown": "---\ntitle: \"`serde` integration\"\nfreeze: true\n---\n\n\nOne of the most widely used rust crates is the **ser**ialization and **de**serialization crate [serde](https://serde.rs/). It enables rust developers to write their custom structs to many different file formats such as json, toml, yaml, csv, and many more as well as read directly from them.\n\n`extendr` provides a `serde` feature that can convert R objects into structs and struct into R objects. \n\nFirst, modify your `Cargo.toml` to include the serde feature.\n\n\n\n\n\n\n::: {.cell filename='Cargo.toml'}\n\n```{.toml .cell-code}\n#[dependenices]\nextender-api = { version = \"*\", features = [\"serde\"] }\n```\n:::\n\n\nFor this example we will have a `Point` struct with two fields, `x`, and `y`. In your `lib.rs` include: \n\n\n::: {.cell filename='lib.rs'}\n\n```{.rust .cell-code}\nuse extendr_api::prelude::*;\nuse serde::{Deserialize, Serialize};\n\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct Point {\n x: f64,\n y: f64\n}\n```\n:::\n\n\n## Deserializing R objects\n\nThis defines a `Point` struct. However, you may want to be able to use an R object to represent that point. To deserialize R objects into Rust, use `extendr_api::deserializer::from_robj`. For a basic example we can deserialize \nan `Robj` into the `Point`.\n\n\n::: {.cell}\n\n```{.rust .cell-code}\nuse extendr_api::deserializer::from_robj;\n\n#[extendr]\nfn point_from_r(x: Robj) {\n let point = from_robj::(&x);\n rprintln!(\"{point:?}\");\n}\n```\n:::\n\n\nTo represent a struct, a named list has to be used. Each name must correspond with the field name of the struct. In this case these are `x` and `y`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\npoint <- list(x = 3.0, y = 0.14)\npoint_from_r(list(x = 3.0, y = 0.14))\n#> Ok(Point { x: 3.0, y: 0.14 })\n```\n:::\n\n\n\n## Serializing \n\nTo serialize R objects you must use `extendr_api::serializer::to_robj` this will take a serde-compatible struct and convert it into a corresponding R object. \n\n\n::: {.cell}\n\n```{.rust .cell-code}\nuse extendr_api::prelude::*;\nuse extendr_api::serializer::to_robj;\nuse extendr_api::deserializer::from_robj;\n#[extendr]\nfn round_trip(x: Robj) -> Result {\n let point = from_robj::(&x)?;\n to_robj(&point)\n}\n```\n:::\n\n\nThis function will parse a list into a point and then return the `Point` as an R object as well doing a round trip deserialization and serialization process. \n\n\n::: {.cell}\n\n```{.r .cell-code}\nround_trip(\n list(x = 3.0, y = 0.14)\n)\n#> $x\n#> [1] 3\n#> \n#> $y\n#> [1] 0.14\n```\n:::\n\n\n### Vectors of structs\n\nYou may find your self wanting to deserialize many structs at once from vectors. For example, if you have a `data.frame` with 2 columns `x` and `y` you may want to deserialize this into a `Vec`. To your dismay you will find this not actually possible. \n\nFor example we can create a function `replicate_point()`.\n\n\n::: {.cell}\n\n```{.rust .cell-code}\n#[extendr]\nfn replicate_point(x: Robj, n: i32) -> Result {\n let point = from_robj::(&x)?;\n let points = vec![point; n as usize];\n to_robj(&points)\n}\n```\n:::\n\n\nThis will create a `Vec` with the size of `n`. If you serialize this to R you will get a list of lists where each sub-list is a named-list with elements `x` and `y`. This is expected. And is quite like how you would expect something to be serialized into json or yaml for example. \n\n\n::: {.cell}\n\n```{.r .cell-code}\nreplicate_point(list(x = 0.14, y = 10), 3L)\n#> [[1]]\n#> [[1]]$x\n#> [1] 0.14\n#> \n#> [[1]]$y\n#> [1] 10\n#> \n#> \n#> [[2]]\n#> [[2]]$x\n#> [1] 0.14\n#> \n#> [[2]]$y\n#> [1] 10\n#> \n#> \n#> [[3]]\n#> [[3]]$x\n#> [1] 0.14\n#> \n#> [[3]]$y\n#> [1] 10\n```\n:::\n\n\nWhen providing a `data.frame`, a closer analogue would be a struct with vectors for their fields like a `MultiPoint` struct \n\n\n::: {.cell}\n\n```{.rust .cell-code}\n#[derive(Debug, Clone, Serialize, Deserialize)]\npub struct MultiPoint {\n x: Vec,\n y: Vec,\n}\n```\n:::\n\n\nand for the sake of demonstration we can create a `make_multipoint()` function: \n\n\n::: {.cell}\n\n```{.rust .cell-code}\n#[extendr]\nfn make_multipoint(x: Robj) -> Result<()> {\n let mpoint = from_robj::(&x)?;\n rprintln!(\"{mpoint:#?}\");\n Ok(())\n}\n```\n:::\n\n\nThis function can be used to parse a `data.frame` into a `MultiPoint`.\n\n\n::: {.cell}\n\n```{.r .cell-code}\nmake_multipoint(\n data.frame(x = 0:2, y = 9:7)\n)\n#> MultiPoint {\n#> x: [\n#> 0.0,\n#> 1.0,\n#> 2.0,\n#> ],\n#> y: [\n#> 9.0,\n#> 8.0,\n#> 7.0,\n#> ],\n#> }\n#> NULL\n```\n:::\n\n\n## Using `TryFrom` \n\n\nOne of the benefits and challenges of rust is that it requires us to be explicit. Adding another language into play makes it all the more confusing! In many cases there isn't a 1:1 mapping from Rust to R as you have seen the `Point` and `MultiPoint`. One way to simplify this would be to use a `TryFrom` trait implementation. This is discussed in more detail in another part of the user guide. \n\nRather than use serde to do the conversion for you, you probably want a custom `TryFrom` trait implementation. Here we define an `MPoint` tuple struct and then implement `TryFrom` for it.\n\n::: {.cell}\n\n```{.rust .cell-code}\npub struct MPoint(Vec);\n\nimpl TryFrom for MPoint {\n type Error = Error;\n fn try_from(value: Robj) -> std::result::Result {\n let point_df = List::try_from(&value)?;\n let x_vec = Doubles::try_from(point_df.dollar(\"x\")?)?;\n let y_vec = Doubles::try_from(point_df.dollar(\"y\")?)?;\n let inner = x_vec.into_iter().zip(y_vec.into_iter()).map(|(x, y)| {\n Point {\n x: x.inner(),\n y: y.inner()\n }\n }).collect::>();\n Ok(MPoint(inner))\n }\n}\n```\n:::\n\n\nThis gives us the benefit of being able to pass the struct type directly into the function. Here we create a function `centroid()` to calculate the centroid of the `MPoint` struct directly. We use `to_robj()` to convert it back to an `Robj`. \n\n\n::: {.cell}\n\n```{.rust .cell-code}\n#[extendr]\nfn centroid(x: MPoint) -> Result {\n let total = x.0.into_iter().fold((0.0, 0.0, 0.0), |mut acc, next| {\n acc.0 += next.x;\n acc.1 += next.y;\n acc.2 += 1.0;\n acc\n });\n let centroid = Point {\n x: total.0 / total.2,\n y: total.1 / total.2\n };\n to_robj(¢roid)\n}\n```\n:::\n\n\nThis function can be used with a `data.frame` because we implemented the `TryFrom` trait. \n\n\n::: {.cell}\n\n```{.r .cell-code}\ncentroid(\n data.frame(x = rnorm(10), y = rnorm(10))\n)\n#> $x\n#> [1] -0.09167968\n#> \n#> $y\n#> [1] -0.1613052\n```\n:::\n", + "supporting": [], + "filters": [ + "rmarkdown/pagebreak.lua" + ], + "includes": {}, + "engineDependencies": {}, + "preserve": {}, + "postProcess": true + } +} \ No newline at end of file diff --git a/_quarto.yml b/_quarto.yml index 33ed6d2..4f1a787 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -51,6 +51,7 @@ website: - section: "Error Handling" contents: - user-guide/error-handling/basic-error-handling.qmd + - user-guide/serde-integration.qmd format: html: diff --git a/user-guide/serde-integration.qmd b/user-guide/serde-integration.qmd new file mode 100644 index 0000000..fc86e28 --- /dev/null +++ b/user-guide/serde-integration.qmd @@ -0,0 +1,261 @@ +--- +title: "`serde` integration" +freeze: true +--- + +One of the most widely used rust crates is the **ser**ialization and **de**serialization crate [serde](https://serde.rs/). It enables rust developers to write their custom structs to many different file formats such as json, toml, yaml, csv, and many more as well as read directly from them. + +`extendr` provides a `serde` feature that can convert R objects into structs and struct into R objects. + +First, modify your `Cargo.toml` to include the serde feature. + +```{r, include = FALSE} +library(rextendr) +``` + +```{extendrsrc include=FALSE, engine.opts = list(dependencies = list(serde = list(features = c("derive", "default"), version = "*")), features = "serde") } +use extendr_api::prelude::*; +use extendr_api::deserializer::from_robj; +use extendr_api::serializer::to_robj; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Point { + x: f64, + y: f64 +} + +#[extendr] +fn point_from_r(x: Robj) { + let point = from_robj::(&x); + rprintln!("{point:?}"); +} + +#[extendr] +fn round_trip(x: Robj) -> Result { + let point = from_robj::(&x)?; + to_robj(&point) +} + +#[extendr] +fn replicate_point(x: Robj, n: i32) -> Result { + let point = from_robj::(&x)?; + let points = vec![point; n as usize]; + to_robj(&points) +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MultiPoint { + x: Vec, + y: Vec, +} + + +#[extendr] +fn make_multipoint(x: Robj) -> Result<()> { + let mpoint = from_robj::(&x)?; + rprintln!("{mpoint:#?}"); + Ok(()) +} + + +pub struct MPoint(Vec); + +impl TryFrom for MPoint { + type Error = Error; + fn try_from(value: Robj) -> std::result::Result { + let point_df = List::try_from(&value)?; + let x_vec = Doubles::try_from(point_df.dollar("x")?)?; + let y_vec = Doubles::try_from(point_df.dollar("y")?)?; + let inner = x_vec.into_iter().zip(y_vec.into_iter()).map(|(x, y)| { + Point { + x: x.inner(), + y: y.inner() + } + }).collect::>(); + Ok(MPoint(inner)) + } +} + +#[extendr] +fn centroid(x: MPoint) -> Result { + let total = x.0.into_iter().fold((0.0, 0.0, 0.0), |mut acc, next| { + acc.0 += next.x; + acc.1 += next.y; + acc.2 += 1.0; + acc + }); + let centroid = Point { + x: total.0 / total.2, + y: total.1 / total.2 + }; + to_robj(¢roid) +} +``` + +```{toml filename="Cargo.toml"} +#[dependenices] +extender-api = { version = "*", features = ["serde"] } +``` + +For this example we will have a `Point` struct with two fields, `x`, and `y`. In your `lib.rs` include: + +```{rust filename="lib.rs"} +use extendr_api::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Point { + x: f64, + y: f64 +} +``` + +## Deserializing R objects + +This defines a `Point` struct. However, you may want to be able to use an R object to represent that point. To deserialize R objects into Rust, use `extendr_api::deserializer::from_robj`. For a basic example we can deserialize +an `Robj` into the `Point`. + +```{rust} +use extendr_api::deserializer::from_robj; + +#[extendr] +fn point_from_r(x: Robj) { + let point = from_robj::(&x); + rprintln!("{point:?}"); +} +``` + +To represent a struct, a named list has to be used. Each name must correspond with the field name of the struct. In this case these are `x` and `y`. + +```{r} +point <- list(x = 3.0, y = 0.14) +point_from_r(list(x = 3.0, y = 0.14)) +``` + + +## Serializing + +To serialize R objects you must use `extendr_api::serializer::to_robj` this will take a serde-compatible struct and convert it into a corresponding R object. + +```{rust} +use extendr_api::prelude::*; +use extendr_api::serializer::to_robj; +use extendr_api::deserializer::from_robj; +#[extendr] +fn round_trip(x: Robj) -> Result { + let point = from_robj::(&x)?; + to_robj(&point) +} +``` + +This function will parse a list into a point and then return the `Point` as an R object as well doing a round trip deserialization and serialization process. + +```{r} +round_trip( + list(x = 3.0, y = 0.14) +) +``` + +### Vectors of structs + +You may find your self wanting to deserialize many structs at once from vectors. For example, if you have a `data.frame` with 2 columns `x` and `y` you may want to deserialize this into a `Vec`. To your dismay you will find this not actually possible. + +For example we can create a function `replicate_point()`. + +```{rust} +#[extendr] +fn replicate_point(x: Robj, n: i32) -> Result { + let point = from_robj::(&x)?; + let points = vec![point; n as usize]; + to_robj(&points) +} +``` + +This will create a `Vec` with the size of `n`. If you serialize this to R you will get a list of lists where each sub-list is a named-list with elements `x` and `y`. This is expected. And is quite like how you would expect something to be serialized into json or yaml for example. + +```{r} +replicate_point(list(x = 0.14, y = 10), 3L) +``` + +When providing a `data.frame`, a closer analogue would be a struct with vectors for their fields like a `MultiPoint` struct + +```{rust} +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MultiPoint { + x: Vec, + y: Vec, +} +``` + +and for the sake of demonstration we can create a `make_multipoint()` function: + +```{rust} +#[extendr] +fn make_multipoint(x: Robj) -> Result<()> { + let mpoint = from_robj::(&x)?; + rprintln!("{mpoint:#?}"); + Ok(()) +} +``` + +This function can be used to parse a `data.frame` into a `MultiPoint`. + +```{r} +make_multipoint( + data.frame(x = 0:2, y = 9:7) +) +``` + +## Using `TryFrom` + + +One of the benefits and challenges of rust is that it requires us to be explicit. Adding another language into play makes it all the more confusing! In many cases there isn't a 1:1 mapping from Rust to R as you have seen the `Point` and `MultiPoint`. One way to simplify this would be to use a `TryFrom` trait implementation. This is discussed in more detail in another part of the user guide. + +Rather than use serde to do the conversion for you, you probably want a custom `TryFrom` trait implementation. Here we define an `MPoint` tuple struct and then implement `TryFrom` for it. +```{rust} +pub struct MPoint(Vec); + +impl TryFrom for MPoint { + type Error = Error; + fn try_from(value: Robj) -> std::result::Result { + let point_df = List::try_from(&value)?; + let x_vec = Doubles::try_from(point_df.dollar("x")?)?; + let y_vec = Doubles::try_from(point_df.dollar("y")?)?; + let inner = x_vec.into_iter().zip(y_vec.into_iter()).map(|(x, y)| { + Point { + x: x.inner(), + y: y.inner() + } + }).collect::>(); + Ok(MPoint(inner)) + } +} +``` + +This gives us the benefit of being able to pass the struct type directly into the function. Here we create a function `centroid()` to calculate the centroid of the `MPoint` struct directly. We use `to_robj()` to convert it back to an `Robj`. + +```{rust} +#[extendr] +fn centroid(x: MPoint) -> Result { + let total = x.0.into_iter().fold((0.0, 0.0, 0.0), |mut acc, next| { + acc.0 += next.x; + acc.1 += next.y; + acc.2 += 1.0; + acc + }); + let centroid = Point { + x: total.0 / total.2, + y: total.1 / total.2 + }; + to_robj(¢roid) +} +``` + +This function can be used with a `data.frame` because we implemented the `TryFrom` trait. + +```{r} +centroid( + data.frame(x = rnorm(10), y = rnorm(10)) +) +```