From 68e211220523b8bdab0d7bcc510867ab43b20719 Mon Sep 17 00:00:00 2001 From: Avik Pal Date: Mon, 2 Oct 2023 11:14:08 -0400 Subject: [PATCH] [skip tests] [skip docs] start joss paper draft --- .github/workflows/JOSSPaper.yml | 23 +++++++++++ joss/paper.bib | 71 +++++++++++++++++++++++++++++++++ joss/paper.md | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 .github/workflows/JOSSPaper.yml create mode 100644 joss/paper.bib create mode 100644 joss/paper.md diff --git a/.github/workflows/JOSSPaper.yml b/.github/workflows/JOSSPaper.yml new file mode 100644 index 000000000..8effae6d1 --- /dev/null +++ b/.github/workflows/JOSSPaper.yml @@ -0,0 +1,23 @@ +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: joss/paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: joss/paper.pdf \ No newline at end of file diff --git a/joss/paper.bib b/joss/paper.bib new file mode 100644 index 000000000..47350cf2a --- /dev/null +++ b/joss/paper.bib @@ -0,0 +1,71 @@ +@article{bai2019deep, + title = {Deep equilibrium models}, + author = {Bai, Shaojie and Kolter, J Zico and Koltun, Vladlen}, + journal = {arXiv preprint arXiv:1909.01377}, + year = {2019} +} + +@inproceedings{enzyme:2020, + author = {Moses, William and Churavy, Valentin}, + booktitle = {Advances in Neural Information Processing Systems}, + editor = {H. Larochelle and M. Ranzato and R. Hadsell and M. F. Balcan and H. Lin}, + pages = {12472--12485}, + publisher = {Curran Associates, Inc.}, + title = {Instead of Rewriting Foreign Code for Machine Learning, Automatically Synthesize Fast Gradients}, + url = {https://proceedings.neurips.cc/paper/2020/file/9332c513ef44b682e9347822c2e457ac-Paper.pdf}, + volume = {33}, + year = {2020} +} + +@inproceedings{enzyme:2021, + author = {Moses, William S. and Churavy, Valentin and Paehler, Ludger and H\"{u}ckelheim, Jan and Narayanan, Sri Hari Krishna and Schanen, Michel and Doerfert, Johannes}, + title = {Reverse-Mode Automatic Differentiation and Optimization of GPU Kernels via Enzyme}, + year = {2021}, + isbn = {9781450384421}, + publisher = {Association for Computing Machinery}, + address = {New York, NY, USA}, + url = {https://doi.org/10.1145/3458817.3476165}, + doi = {10.1145/3458817.3476165}, + booktitle = {Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis}, + articleno = {61}, + numpages = {16}, + keywords = {CUDA, LLVM, ROCm, HPC, AD, GPU, automatic differentiation}, + location = {St. Louis, Missouri}, + series = {SC '21} +} + +@misc{innes2018fashionable, + title = {Fashionable Modelling with Flux}, + author = {Michael Innes and Elliot Saba and Keno Fischer and Dhairya Gandhi and Marco Concetto Rudilosso and Neethu Mariya Joy and Tejan Karmali and Avik Pal and Viral Shah}, + year = {2018}, + eprint = {1811.01457}, + archiveprefix = {arXiv}, + primaryclass = {cs.PL} +} + +@misc{pal2023continuous, + title = {Continuous Deep Equilibrium Models: Training Neural ODEs Faster by Integrating Them to Infinity}, + author = {Avik Pal and Alan Edelman and Christopher Rackauckas}, + year = {2022}, + eprint = {2201.12240}, + archiveprefix = {arXiv}, + primaryclass = {cs.LG} +} + +@misc{simplechains, + author = {Elrod, Chris}, + title = {SimpleChains.jl}, + year = {2021}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/PumasAI/SimpleChains.jl} +} + +@inproceedings{yuret2016knet, + title = {Knet: beginning deep learning with 100 lines of julia}, + author = {Yuret, Deniz}, + booktitle = {Machine Learning Systems Workshop at NIPS}, + volume = {2016}, + pages = {5}, + year = {2016} +} \ No newline at end of file diff --git a/joss/paper.md b/joss/paper.md new file mode 100644 index 000000000..c3199c319 --- /dev/null +++ b/joss/paper.md @@ -0,0 +1,69 @@ +--- +title: 'Lux.jl: Bridging Scientific Computing & Deep Learning' +tags: + - Julia + - Deep Learning + - Scientific Computing + - Neural Ordinary Differential Equations + - Deep Equilibrium Models +authors: + - name: Avik Pal + orcid: 0000-0002-3938-7375 + affiliation: "1" +affiliations: + - name: Electrical Engineering and Computer Science, CSAIL, MIT + index: 1 +date: 2 October 2023 +bibliography: paper.bib +--- + +# Summary + +Combining Machine Learning and Scientific Computing have recently led to development of +methods like Universal Differential Equations, Neural Differential Equations, Deep Equilibrium Models, etc., +which have been pushing the boundaries of physical sciences. However, every major deep learning +framework requires the numerical softwares to be rewritten to satisfy their specific requirements. +Lux.jl is a deep learning framework written in Julia with the correct abstractions to provide seamless +composability with scientific computing softwares. Lux uses pure functions to provide a +compiler and automatic differentiation friendly interface without compromising on the performance. + +# Statement of Need + +Julia already has quite a few well established Neural Network Frameworks – +Flux [@innes2018fashionable] and KNet [@yuret2016knet]. However, certain design elements – +*Coupled Model and Parameters* and *Internal Mutations* – associated with these frameworks +make them less compiler. + +## Switching Automatic Differentiation Frameworks + +## Support for CPU, NVIDIA GPUs and AMD GPUs + +## Composability with Scientific Computing Softwares + +In this section, we will go over a couple of examples to show how Lux.jl can be used with +other scientific computing softwares. Lux.jl has an extensive +[manual](https://lux.csail.mit.edu/dev/manual/interface), +[tutorials](https://lux.csail.mit.edu/dev/tutorials/), and +[API Reference](https://lux.csail.mit.edu/dev/api/), showcasing the composability in more +details. + +### Neural Ordinary Differential Equations + +### Deep Equilibrium Models + +Deep Equilibrium Models [@bai2019deep; @pal2023continuous] are a class of neural networks +where the output of the model is the steady state of a dynamical system defined by an +internal neural network. + +## Ecosystem + +# Limitations + +Lux.jl is still in its early days of development and has the following known limitations: + +* Training Small Neural Networks on CPUs are not optimized yet. For small networks, + [SimpleChains.jl](https://github.com/PumasAI/SimpleChains.jl) [@simplechains] is the fastest option! +* Nested Automatic Differentiation is current not well supported. We hope to fix this soon, + with a migration to Enzyme Automatic Differentiation Framework [@enzyme:2020; @enzyme:2021]. + +# References