From f69fb236cee909ddfdbbc7bdc531231660c4f269 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 25 Jun 2024 10:56:38 +0100 Subject: [PATCH] Update slides - June 2024 --- docs/slides/intro/cubed-intro.ipynb | 236 +++++++++++++++------- docs/slides/intro/cubed-intro.slides.html | 69 ++++--- docs/slides/intro/fusion-unoptimized.png | Bin 16665 -> 0 bytes docs/slides/intro/fusion.png | Bin 9439 -> 0 bytes docs/slides/intro/toy-optimized.png | Bin 0 -> 25464 bytes docs/slides/intro/toy-unoptimized.png | Bin 0 -> 33040 bytes 6 files changed, 209 insertions(+), 96 deletions(-) delete mode 100644 docs/slides/intro/fusion-unoptimized.png delete mode 100644 docs/slides/intro/fusion.png create mode 100644 docs/slides/intro/toy-optimized.png create mode 100644 docs/slides/intro/toy-unoptimized.png diff --git a/docs/slides/intro/cubed-intro.ipynb b/docs/slides/intro/cubed-intro.ipynb index a6e5f433..f2904dea 100644 --- a/docs/slides/intro/cubed-intro.ipynb +++ b/docs/slides/intro/cubed-intro.ipynb @@ -11,7 +11,7 @@ "source": [ "# Cubed: an introduction\n", "\n", - "Tom White, November 2023" + "Tom White, June 2024" ] }, { @@ -207,9 +207,9 @@ "source": [ "# Example: `reduction`\n", "\n", - "![`reduction`](../../images/reduction.svg)\n", + "![`reduction`](../../images/reduction_new.svg)\n", "\n", - "Implemented using multiple rounds of calls to `blockwise` and `rechunk`." + "Implemented using multiple rounds of a tree reduce operation followed by a final aggregation." ] }, { @@ -239,72 +239,159 @@ { "data": { "image/svg+xml": [ - "\n", - "\n", - "\n", - "num tasks: 4\n", - "max projected memory: 100.0 MB\n", - "\n", + "\n", + "\n", + "\n", + "num tasks: 5\n", + "max projected memory: 100.0 MB\n", + "total nbytes written: 72 bytes\n", + "optimized: True\n", + "\n", "\n", - "array-001\n", - "op-001\n", + "\n", - "\n", - "array-001\n", - "asarray \n", + "\n", + "op-001\n", + "asarray\n", "\n", "\n", "\n", - "\n", - "\n", - "array-004\n", - "\n", + "\n", + "array-001\n", + "\n", + "\n", + "array-001\n", + "a\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-001->array-001\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-004\n", + "\n", - "\n", - "array-004\n", - "add (bw)\n", + "\n", + "op-004\n", + "add\n", + "tasks: 4\n", "\n", "\n", "\n", - "\n", - "\n", - "array-001->array-004\n", - "\n", - "\n", + "\n", + "\n", + "array-001->op-004\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-002\n", + "\n", + "\n", + "op-002\n", + "asarray\n", + "\n", + "\n", "\n", "\n", - "\n", + "\n", "array-002\n", - "\n", + "\n", + "array-002\n", + "b\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "op-002->array-002\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-002->op-004\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "array-004\n", + "\n", - "\n", - "array-002\n", - "asarray \n", + "nbytes: 72 bytes\">\n", + "\n", + "array-004\n", + "c\n", "\n", "\n", "\n", - "\n", - "\n", - "array-002->array-004\n", - "\n", - "\n", + "\n", + "\n", + "op-004->array-004\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays\n", + "\n", + "\n", + "create-arrays\n", + "tasks: 1\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "arrays\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "create-arrays->arrays\n", + "\n", + "\n", "\n", "\n", "" @@ -346,14 +433,26 @@ "source": [ "# Optimization\n", "\n", - "Cubed will optimize the graph before computing it - by fusing blockwise (map) operations.\n", + "Cubed will automatically optimize the graph before computing it. For example by fusing blockwise (map) operations:\n", "\n", "

\n", - " \n", - " \n", - "

\n", + " \n", + " \n", + "

" + ] + }, + { + "cell_type": "markdown", + "id": "925fff3c-5531-4953-891e-b382583de56b", + "metadata": {}, + "source": [ + "# Optimization: an advanced example\n", + "\n", + "In early 2024 we implemented more optimizations to give a **4.8x** performance improvement on the \"Quadratic Means\" climate workload running on Lithops with AWS Lambda, with a **1.5 TB** workload completing in around **100 seconds**\n", "\n", - "This is a simple case - still lots of optimizations left to do." + "\n", + "\n", + "More details in [Optimizing Cubed](https://medium.com/pangeo/optimizing-cubed-7a0b8f65f5b7)\n" ] }, { @@ -452,30 +551,30 @@ }, { "cell_type": "markdown", - "id": "b1fb4379", + "id": "d5a1fddd", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ - "* __Modal__: new serverless platform\n", - " * Very easy to set up since it builds the runtime automatically\n", - " * Tested with ~300 workers" + "* __Lithops__: multi-cloud serverless computing framework\n", + " * Slightly more work to get started since you have to build a runtime environment first\n", + " * Tested on AWS Lambda and Google Cloud Functions with ~1000 workers" ] }, { "cell_type": "markdown", - "id": "d5a1fddd", + "id": "b1fb4379", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ - "* __Lithops__: multi-cloud serverless computing framework\n", - " * Slightly more work to get started since you have to build a runtime environment first\n", - " * Tested on AWS Lambda and Google Cloud Functions with ~1000 workers" + "* __Modal__: new serverless platform\n", + " * Very easy to set up since it builds the runtime automatically\n", + " * Tested with ~300 workers" ] }, { @@ -525,7 +624,7 @@ "* Retries\n", " * Each task is tried three times before failing\n", "* Stragglers\n", - " * A backup task will be launched if a task is taking significantly longer than average (off by default)" + " * A backup task will be launched if a task is taking significantly longer than average" ] }, { @@ -539,10 +638,10 @@ "source": [ "# Xarray integration\n", "\n", - "* Tom Nicholas added [Generalize handling of chunked array types](https://github.com/pydata/xarray/pull/7019) to Xarray\n", - " * Xarray can use Cubed as its computation engine instead of Dask\n", - " * Also needs [cubed-xarray](https://github.com/xarray-contrib/cubed-xarray) integration package\n", - "* Examples at https://github.com/pangeo-data/distributed-array-examples" + "* Xarray can use Cubed as its computation engine instead of Dask\n", + " * Just install the [cubed-xarray](https://github.com/xarray-contrib/cubed-xarray) integration package\n", + "* Cubed can use [Flox](https://flox.readthedocs.io/en/latest/) for `groupby` operations\n", + " * Examples at https://flox.readthedocs.io/en/latest/user-stories/climatology-hourly-cubed.html" ] }, { @@ -554,13 +653,12 @@ } }, "source": [ - "# Next steps\n", + "# Try out Cubed!\n", "\n", - "* Community\n", - "* Examples and use cases\n", - " * Pangeo\n", - " * sgkit\n", - "* [Optimizations](https://github.com/tomwhite/cubed/issues?q=is%3Aissue+is%3Aopen+label%3Aoptimization)" + "* Try it out on your use case\n", + " * Get started at https://cubed-dev.github.io/cubed/\n", + "* Some examples from the Pangeo community:\n", + " * https://github.com/pangeo-data/distributed-array-examples" ] } ], diff --git a/docs/slides/intro/cubed-intro.slides.html b/docs/slides/intro/cubed-intro.slides.html index 6f4dbcd3..f9725cac 100644 --- a/docs/slides/intro/cubed-intro.slides.html +++ b/docs/slides/intro/cubed-intro.slides.html @@ -7440,7 +7440,7 @@
@@ -7608,8 +7608,8 @@

Example: map_direct @@ -7646,7 +7646,7 @@

Computation plan @@ -7669,12 +7669,24 @@

Computation plan + + + +
-