diff --git a/.all-contributorsrc b/.all-contributorsrc index 66be6031..ec7ff5a1 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -7,55 +7,46 @@ ], "contributors": [ { - "login": "mpstewart1", - "name": "Matthew Stewart", - "avatar_url": "https://avatars.githubusercontent.com/mpstewart1", - "profile": "https://github.com/mpstewart1", - "contributions": [ - "doc" - ] - }, - { - "login": "profvjreddi", - "name": "Vijay Janapa Reddi", - "avatar_url": "https://avatars.githubusercontent.com/profvjreddi", - "profile": "https://github.com/profvjreddi", + "login": "ShvetankPrakash", + "name": "Shvetank Prakash", + "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash", + "profile": "https://github.com/ShvetankPrakash", "contributions": [ "doc" ] }, { - "login": "ishapira1", - "name": "ishapira", - "avatar_url": "https://avatars.githubusercontent.com/ishapira1", - "profile": "https://github.com/ishapira1", + "login": "aptl26", + "name": "aptl26", + "avatar_url": "https://avatars.githubusercontent.com/aptl26", + "profile": "https://github.com/aptl26", "contributions": [ "doc" ] }, { - "login": "Mjrovai", - "name": "Marcelo Rovai", - "avatar_url": "https://avatars.githubusercontent.com/Mjrovai", - "profile": "https://github.com/Mjrovai", + "login": "sjohri20", + "name": "sjohri20", + "avatar_url": "https://avatars.githubusercontent.com/sjohri20", + "profile": "https://github.com/sjohri20", "contributions": [ "doc" ] }, { - "login": "oishib", - "name": "oishib", - "avatar_url": "https://avatars.githubusercontent.com/oishib", - "profile": "https://github.com/oishib", + "login": "jaysonzlin", + "name": "Jayson Lin", + "avatar_url": "https://avatars.githubusercontent.com/jaysonzlin", + "profile": "https://github.com/jaysonzlin", "contributions": [ "doc" ] }, { - "login": "uchendui", - "name": "Ikechukwu Uchendu", - "avatar_url": "https://avatars.githubusercontent.com/uchendui", - "profile": "https://github.com/uchendui", + "login": "BaeHenryS", + "name": "Henry Bae", + "avatar_url": "https://avatars.githubusercontent.com/BaeHenryS", + "profile": "https://github.com/BaeHenryS", "contributions": [ "doc" ] @@ -79,28 +70,19 @@ ] }, { - "login": "sophiacho1", - "name": "sophiacho1", - "avatar_url": "https://avatars.githubusercontent.com/sophiacho1", - "profile": "https://github.com/sophiacho1", + "login": "18jeffreyma", + "name": "Jeffrey Ma", + "avatar_url": "https://avatars.githubusercontent.com/18jeffreyma", + "profile": "https://github.com/18jeffreyma", "contributions": [ "doc" ] }, { - "login": "ShvetankPrakash", - "name": "Shvetank Prakash", - "avatar_url": "https://avatars.githubusercontent.com/ShvetankPrakash", - "profile": "https://github.com/ShvetankPrakash", - "contributions": [ - "doc" - ] - }, - { - "login": "colbybanbury", - "name": "Colby Banbury", - "avatar_url": "https://avatars.githubusercontent.com/colbybanbury", - "profile": "https://github.com/colbybanbury", + "login": "uchendui", + "name": "Ikechukwu Uchendu", + "avatar_url": "https://avatars.githubusercontent.com/uchendui", + "profile": "https://github.com/uchendui", "contributions": [ "doc" ] @@ -124,19 +106,46 @@ ] }, { - "login": "BaeHenryS", - "name": "Henry Bae", - "avatar_url": "https://avatars.githubusercontent.com/BaeHenryS", - "profile": "https://github.com/BaeHenryS", + "login": "mpstewart1", + "name": "Matthew Stewart", + "avatar_url": "https://avatars.githubusercontent.com/mpstewart1", + "profile": "https://github.com/mpstewart1", "contributions": [ "doc" ] }, { - "login": "sjohri20", - "name": "sjohri20", - "avatar_url": "https://avatars.githubusercontent.com/sjohri20", - "profile": "https://github.com/sjohri20", + "login": "Mjrovai", + "name": "Marcelo Rovai", + "avatar_url": "https://avatars.githubusercontent.com/Mjrovai", + "profile": "https://github.com/Mjrovai", + "contributions": [ + "doc" + ] + }, + { + "login": "oishib", + "name": "oishib", + "avatar_url": "https://avatars.githubusercontent.com/oishib", + "profile": "https://github.com/oishib", + "contributions": [ + "doc" + ] + }, + { + "login": "profvjreddi", + "name": "Vijay Janapa Reddi", + "avatar_url": "https://avatars.githubusercontent.com/profvjreddi", + "profile": "https://github.com/profvjreddi", + "contributions": [ + "doc" + ] + }, + { + "login": "colbybanbury", + "name": "Colby Banbury", + "avatar_url": "https://avatars.githubusercontent.com/colbybanbury", + "profile": "https://github.com/colbybanbury", "contributions": [ "doc" ] @@ -149,10 +158,28 @@ "contributions": [ "doc" ] + }, + { + "login": "ishapira1", + "name": "ishapira", + "avatar_url": "https://avatars.githubusercontent.com/ishapira1", + "profile": "https://github.com/ishapira1", + "contributions": [ + "doc" + ] + }, + { + "login": "sophiacho1", + "name": "sophiacho1", + "avatar_url": "https://avatars.githubusercontent.com/sophiacho1", + "profile": "https://github.com/sophiacho1", + "contributions": [ + "doc" + ] } ], "repoType": "github", - "contributorsPerLine": 7, + "contributorsPerLine": 5, "repoHost": "https=//github.com", "commitConvention": "angular", "skipCi": true, diff --git a/.github/workflows/contributors/update_contributors.py b/.github/workflows/contributors/update_contributors.py index e9f10ef8..e9f19509 100644 --- a/.github/workflows/contributors/update_contributors.py +++ b/.github/workflows/contributors/update_contributors.py @@ -1,3 +1,4 @@ +import collections import json import os @@ -31,7 +32,9 @@ def main(_): last_page = res.links.get('last', {}).get('url', None) user_to_name_dict = dict() - users_from_api = [] + name_to_user_dict = dict() + users_from_api = set() + user_full_names_from_api = set() for node in data: commit_info = node.get('commit', None) @@ -39,7 +42,7 @@ def main(_): commit_commiter_info = commit_info.get('committer', None) author_info = node.get('author', None) committer_info = node.get('committer', None) - committer_login_info = committer_info.get('login', None) + committer_login_info = committer_info.get('login', None) if committer_info else None user_full_name = None username = None @@ -53,13 +56,13 @@ def main(_): elif committer_login_info: username = committer_login_info['login'] - assert user_full_name is not None, 'User full name should not be None' - assert username is not None, 'Username should not be None' + if user_full_name: + name_to_user_dict[user_full_name] = username if username else None + user_full_names_from_api.add(user_full_name) + if username: + user_to_name_dict[username] = user_full_name if user_full_name else None + users_from_api.add(username) - user_to_name_dict[username] = user_full_name - users_from_api.append(username) - - users_from_api = set(users_from_api) print('Users pulled from API: ', users_from_api) with open(CONTRIBUTORS_FILE, 'r') as contrib_file: @@ -78,7 +81,7 @@ def main(_): users_from_api), 'All contributors in the .all-contributorsrc file should be pulled using the API' new_contributor_logins = users_from_api - existing_contributor_logins_set - print('New contributors: ', new_contributor_logins) + print('New contributors: ', new_contributor_logins - EXCLUDED_USERS) result = users_from_api - EXCLUDED_USERS @@ -87,14 +90,15 @@ def main(_): projectOwner=OWNER, files=["contributors.qmd", "README.md"], contributors=[dict(login=user, - name=user_to_name_dict[user], + name=user_to_name_dict[user] or user, + # If the user has no full name listed, use their username avatar_url=f'https://avatars.githubusercontent.com/{user}', profile=f'https://github.com/{user}', contributions=['doc'], ) for user in result], repoType='github', - contributorsPerLine=7, + contributorsPerLine=5, repoHost="https=//github.com", commitConvention='angular', skipCi=True, diff --git a/README.md b/README.md index 5160bb90..3519e09c 100644 --- a/README.md +++ b/README.md @@ -88,26 +88,31 @@ quarto render - - - - - - - + + + + + - - - - - - - + + + + + - - + + + + + + + + + + +
Matthew Stewart
Matthew Stewart

馃摉
Vijay Janapa Reddi
Vijay Janapa Reddi

馃摉
ishapira
ishapira

馃摉
Marcelo Rovai
Marcelo Rovai

馃摉
oishib
oishib

馃摉
Ikechukwu Uchendu
Ikechukwu Uchendu

馃摉
naeemkh
naeemkh

馃摉
Shvetank Prakash
Shvetank Prakash

馃摉
aptl26
aptl26

馃摉
sjohri20
sjohri20

馃摉
Jayson Lin
Jayson Lin

馃摉
Henry Bae
Henry Bae

馃摉
Mark Mazumder
Mark Mazumder

馃摉
sophiacho1
sophiacho1

馃摉
Shvetank Prakash
Shvetank Prakash

馃摉
Colby Banbury
Colby Banbury

馃摉
Jessica Quaye
Jessica Quaye

馃摉
Divya
Divya

馃摉
Henry Bae
Henry Bae

馃摉
naeemkh
naeemkh

馃摉
Mark Mazumder
Mark Mazumder

馃摉
Jeffrey Ma
Jeffrey Ma

馃摉
Ikechukwu Uchendu
Ikechukwu Uchendu

馃摉
Jessica Quaye
Jessica Quaye

馃摉
sjohri20
sjohri20

馃摉
Marco Zennaro
Marco Zennaro

馃摉
Divya
Divya

馃摉
Matthew Stewart
Matthew Stewart

馃摉
Marcelo Rovai
Marcelo Rovai

馃摉
oishib
oishib

馃摉
Vijay Janapa Reddi
Vijay Janapa Reddi

馃摉
Colby Banbury
Colby Banbury

馃摉
Marco Zennaro
Marco Zennaro

馃摉
ishapira
ishapira

馃摉
sophiacho1
sophiacho1

馃摉
diff --git a/ai_for_good.qmd b/ai_for_good.qmd index cecde81f..33b946ac 100644 --- a/ai_for_good.qmd +++ b/ai_for_good.qmd @@ -1,5 +1,8 @@ # AI for Good +![_DALL路E 3 Prompt: Illustration of planet Earth wrapped in shimmering neural networks, with diverse humans and AI robots working together on various projects like planting trees, cleaning the oceans, and developing sustainable energy solutions. The positive and hopeful atmosphere represents a united effort to create a better future._](images/cover_ai_good.png) + + By aligning AI progress with human values, goals, and ethics, the ultimate goal of ML systems (at any scale) is to be a technology that reflects human principles and aspirations. Initiatives under "AI for Good" promote the development of AI to tackle the [UN Sustainable Development Goals](https://www.undp.org/sustainable-development-goals) (SDGs) using embedded AI technologies, expanding access to AI education, amongst other things. While it is now clear that AI will be an instrumental part of progress towards the SDGs, its adoption and impact are limited by the immense power consumption, strong connectivity requirements and high costs of cloud-based deployments. TinyML, applowing ML models to run on low-cost and low-power microcontrollers, can circumvent many of these issues. > The "AI for Good" movement plays a critical role in cultivating a future where an AI-empowered society is more just, sustainable, and prosperous for all of humanity. diff --git a/contributors.qmd b/contributors.qmd index 7b958325..24df99e1 100644 --- a/contributors.qmd +++ b/contributors.qmd @@ -8,26 +8,31 @@ We extend our sincere thanks to the diverse group of individuals who have genero - - - - - - - + + + + + - - - - - - - + + + + + - - + + + + + + + + + + +
Matthew Stewart
Matthew Stewart

馃摉
Vijay Janapa Reddi
Vijay Janapa Reddi

馃摉
ishapira
ishapira

馃摉
Marcelo Rovai
Marcelo Rovai

馃摉
oishib
oishib

馃摉
Ikechukwu Uchendu
Ikechukwu Uchendu

馃摉
naeemkh
naeemkh

馃摉
Shvetank Prakash
Shvetank Prakash

馃摉
aptl26
aptl26

馃摉
sjohri20
sjohri20

馃摉
Jayson Lin
Jayson Lin

馃摉
Henry Bae
Henry Bae

馃摉
Mark Mazumder
Mark Mazumder

馃摉
sophiacho1
sophiacho1

馃摉
Shvetank Prakash
Shvetank Prakash

馃摉
Colby Banbury
Colby Banbury

馃摉
Jessica Quaye
Jessica Quaye

馃摉
Divya
Divya

馃摉
Henry Bae
Henry Bae

馃摉
naeemkh
naeemkh

馃摉
Mark Mazumder
Mark Mazumder

馃摉
Jeffrey Ma
Jeffrey Ma

馃摉
Ikechukwu Uchendu
Ikechukwu Uchendu

馃摉
Jessica Quaye
Jessica Quaye

馃摉
sjohri20
sjohri20

馃摉
Marco Zennaro
Marco Zennaro

馃摉
Divya
Divya

馃摉
Matthew Stewart
Matthew Stewart

馃摉
Marcelo Rovai
Marcelo Rovai

馃摉
oishib
oishib

馃摉
Vijay Janapa Reddi
Vijay Janapa Reddi

馃摉
Colby Banbury
Colby Banbury

馃摉
Marco Zennaro
Marco Zennaro

馃摉
ishapira
ishapira

馃摉
sophiacho1
sophiacho1

馃摉
diff --git a/data_engineering.qmd b/data_engineering.qmd index 47d987fe..c498efea 100644 --- a/data_engineering.qmd +++ b/data_engineering.qmd @@ -1,6 +1,6 @@ # Data Engineering -![_DALL路E 3 Prompt: Illustration in a rectangular format with a cool blue color palette visualizing the Data Engineering process. Starting on the left with icons of raw data sources, they connect to a central hub symbolized by swirling gears and pipelines in shades of blue. This represents the transformation, cleaning, and storage processes. On the right, datasets in refined formats are symbolized by sleek database icons and a machine learning model. Flow lines in varying blue tones connect each element, emphasizing the transition and importance of each data engineering stage._](./images/cover_data_engineering.png) +![_DALL路E 3 Prompt: Create a rectangular illustration visualizing the concept of data engineering. Include elements such as raw data sources, data processing pipelines, storage systems, and refined datasets. Show how raw data is transformed through cleaning, processing, and storage to become valuable information that can be analyzed and used for decision-making._](./images/cover_data_engineering.png) Data is the lifeblood of AI systems. Without good data, even the most advanced machine learning algorithms will fail. In this section, we will dive into the intricacies of building high-quality datasets to fuel our AI models. Data engineering encompasses the processes of collecting, storing, processing, and managing data for training machine learning models. diff --git a/hw_acceleration.qmd b/hw_acceleration.qmd index 266b096f..214f0d9a 100644 --- a/hw_acceleration.qmd +++ b/hw_acceleration.qmd @@ -1,5 +1,7 @@ # AI Acceleration +![_DALL路E 3 Prompt: Create an intricate and colorful representation of a System on Chip (SoC) design in a rectangular format. Showcase a variety of specialized machine learning accelerators and chiplets, all integrated into the processor. Provide a detailed view inside the chip, highlighting the rapid movement of electrons. Each accelerator and chiplet should be designed to interact with neural network neurons, layers, and activations, emphasizing their processing speed. Depict the neural networks as a network of interconnected nodes, with vibrant data streams flowing between the accelerator pieces, showcasing the enhanced computation speed._](./images/cover_ai_hardware.png) + ::: {.callout-tip} ## Learning Objectives diff --git a/images/cover_ai_acceleration.png b/images/cover_ai_acceleration.png new file mode 100644 index 00000000..11f2fa5e Binary files /dev/null and b/images/cover_ai_acceleration.png differ diff --git a/images/cover_ai_good.png b/images/cover_ai_good.png new file mode 100644 index 00000000..f10afe12 Binary files /dev/null and b/images/cover_ai_good.png differ diff --git a/images/cover_ai_hardware.png b/images/cover_ai_hardware.png new file mode 100644 index 00000000..f78d43c8 Binary files /dev/null and b/images/cover_ai_hardware.png differ diff --git a/images/cover_ai_workflow.png b/images/cover_ai_workflow.png index 74e2c8eb..0fe3ed35 100644 Binary files a/images/cover_ai_workflow.png and b/images/cover_ai_workflow.png differ diff --git a/images/cover_data_engineering.png b/images/cover_data_engineering.png index 35641f43..aec6b8ee 100644 Binary files a/images/cover_data_engineering.png and b/images/cover_data_engineering.png differ diff --git a/images/cover_model_optimizations.png b/images/cover_model_optimizations.png new file mode 100644 index 00000000..41cc302a Binary files /dev/null and b/images/cover_model_optimizations.png differ diff --git a/images/cover_ondevice_learning.png b/images/cover_ondevice_learning.png new file mode 100644 index 00000000..e44f6b6b Binary files /dev/null and b/images/cover_ondevice_learning.png differ diff --git a/images/cover_sustainable_ai.png b/images/cover_sustainable_ai.png new file mode 100644 index 00000000..3489befe Binary files /dev/null and b/images/cover_sustainable_ai.png differ diff --git a/ondevice_learning.qmd b/ondevice_learning.qmd index 4953b24a..72e3f2ed 100644 --- a/ondevice_learning.qmd +++ b/ondevice_learning.qmd @@ -1,5 +1,7 @@ # On-Device Learning +![_DALL路E 3 Prompt: Drawing of a smartphone with its internal components exposed, revealing diverse miniature engineers of different genders and skin tones actively working on the machine learning model. The engineers, including men, women, and non-binary individuals, are tuning parameters, repairing connections, and enhancing the network on the fly. Data flows into the machine learning model, being processed in real-time, and generating output inferences._](./images/cover_ondevice_learning.png) + ::: {.callout-tip} ## Learning Objectives diff --git a/ops.qmd b/ops.qmd index 63a4f402..316b1360 100644 --- a/ops.qmd +++ b/ops.qmd @@ -1,5 +1,7 @@ # Embedded AIOps +![_DALL路E 3 Prompt: Rectangular 3D render of a neural network in the center, surrounded by different stages of MLOps in a circular flow, creatively influencing the model production. Each stage, such as data collection, model training, validation, deployment, and monitoring, is represented by unique 3D icons or illustrations. Diverse people in different roles, such as data scientists, engineers, and business users, are working on each stage, actively contributing to the model production._](./images/cover_ml_ops.png) + ::: {.callout-tip} ## Learning Objectives diff --git a/optimizations.qmd b/optimizations.qmd index 1f947d93..cdb9c346 100644 --- a/optimizations.qmd +++ b/optimizations.qmd @@ -1,16 +1,28 @@ # Model Optimizations +![_DALL路E 3 Prompt: Illustration of a neural network model represented as a busy construction site, with a diverse group of construction workers, both male and female, of various ethnicities, labeled as 'pruning', 'quantization', and 'sparsity'. They are working together to make the neural network more efficient and smaller, while maintaining high accuracy. The 'pruning' worker, a Hispanic female, is cutting unnecessary connections from the middle of the network. The 'quantization' worker, a Caucasian male, is adjusting or tweaking the weights all over the place. The 'sparsity' worker, an African female, is removing unnecessary nodes to shrink the model. Construction trucks and cranes are in the background, assisting the workers in their tasks. The neural network is visually transforming from a complex and large structure to a more streamlined and smaller one._](./images/cover_model_optimizations.png) + +When machine learning models are deployed on systems, especially on resource-constrained embedded systems, the optimization of models is a necessity. While machine learning inherently often demands substantial computational resources, the systems are inherently limited in memory, processing power, and energy. This chapter will dive into the art and science of optimizing machine learning models to ensure they are lightweight, efficient, and effective when deployed in TinyML scenarios. + ::: {.callout-tip} ## Learning Objectives -* coming soon. +* Learn techniques like pruning, knowledge distillation and specialized model architectures to represent models more efficiently + +* Understand quantization methods to reduce model size and enable faster inference through reduced precision numerics + +* Explore hardware-aware optimization approaches to match models to target device capabilities + +* Discover software tools like frameworks and model conversion platforms that enable deployment of optimized models + +* Develop holistic thinking to balance tradeoffs in model complexity, accuracy, latency, power etc. based on application requirements + +* Gain strategic insight into selecting and applying model optimizations based on use case constraints and hardware targets ::: ## Introduction -When machine learning models are deployed on systems, especially on resource-constrained embedded systems, the optimization of models is a necessity. While machine learning inherently often demands substantial computational resources, the systems are inherently limited in memory, processing power, and energy. This chapter will dive into the art and science of optimizing machine learning models to ensure they are lightweight, efficient, and effective when deployed in TinyML scenarios. - We have structured this chapter in three tiers. First, in @sec-model_ops_representation we examine the significance and methodologies of reducing the parameter complexity of models without compromising their inference capabilities. Techniques such as pruning and knowledge distillation are discussed, offering insights into how models can be compressed and simplified while maintaining, or even enhancing, their performance. Going one level lower, in @sec-model_ops_numerics, we study the role of numerical precision in model computations and how altering it impacts model size, speed, and accuracy. We will examine the various numerical formats and how reduced-precision arithmetic can be leveraged to optimize models for embedded deployment. @@ -136,6 +148,7 @@ More formally, the lottery ticket hypothesis is a concept in deep learning that ![An example experiment from the lottery ticket hypothesis showing pruning and training experiments on a fully connected LeNet over a variety of pruning ratios: note the first plot showing how pruning is able to reveal a subnetwork nearly one-fifth the size that trains to a higher test accuracy faster than the unpruned network. However, further note how in the second plot that further pruned models in models that both train slower and are not able to achieve that same maximal test accuracy due to the lower number of parameters.](images/modeloptimization_lottery_ticket_hypothesis.png) + #### Challenges & Limitations There is no free lunch with pruning optimizations, with some choices coming with wboth improvements and costs to considers. Below we discuss some tradeoffs for practitioners to consider. @@ -710,14 +723,14 @@ This is one example of Algorithm-Hardware Co-design. CiM is a computing paradigm Different devices may have different memory hierarchies. Optimizing for the specific memory hierarchy in the specific hardware can lead to great performance improvements by reducing the costly operations of reading and writing to memory. Dataflow optimization can be achieved by optimizing for reusing data within a single layer and across multiple layers. This dataflow optimization can be tailored to the specific memory hierarchy of the hardware, which can lead to greater benefits than general optimizations for different hardwares. -### Leveraging Sparsity +#### Leveraging Sparsity Pruning is a fundamental approach to compress models to make them compatible with resource constrained devices. This results in sparse models where a lot of weights are 0's. Therefore, leveraging this sparsity can lead to significant improvements in performance. Tools were created to achieve exactly this. RAMAN, is a sparseTinyML accelerator designed for inference on edge devices. RAMAN overlap input and output activations on the same memory space, reducing storage requirements by up to 50%. [@krishna2023raman] ![A figure showing the sparse columns of the filter matrix of a CNN that are aggregated to create a dense matrix that, leading to smaller dimensions in the matrix and more efficient computations. [@kung2018packing] -### Optimization Frameworks +#### Optimization Frameworks Optimization Frameworks have been introduced to exploit the specific capabilities of the hardware to accelerate the software. One example of such a framework is hls4ml. This open-source software-hardware co-design workflow aids in interpreting and translating machine learning algorithms for implementation with both FPGA and ASIC technologies, enhancing their. Features such as network optimization, new Python APIs, quantization-aware pruning, and end-to-end FPGA workflows are embedded into the hls4ml framework, leveraging parallel processing units, memory hierarchies, and specialized instruction sets to optimize models for edge hardware. Moreover, hls4ml is capable of translating machine learning algorithms directly into FPGA firmware. @@ -725,21 +738,21 @@ Optimization Frameworks have been introduced to exploit the specific capabilitie One other framework for FPGAs that focuses on a holistic approach is CFU Playground [@Prakash_2023] -### Hardware Built Around Software +#### Hardware Built Around Software In a contrasting approach, hardware can be custom-designed around software requirements to optimize the performance for a specific application. This paradigm creates specialized hardware to better adapt to the specifics of the software, thus reducing computational overhead and improving operational efficiency. One example of this approach is a voice-recognition application by [@app112211073]. The paper proposes a structure wherein preprocessing operations, traditionally handled by software, are allocated to custom-designed hardware. This technique was achieved by introducing resistor鈥搕ransistor logic to an inter-integrated circuit sound module for windowing and audio raw data acquisition in the voice-recognition application. Consequently, this offloading of preprocessing operations led to a reduction in computational load on the software, showcasing a practical application of building hardware around software to enhance the efficiency and performance. ![A diagram showing how an FPGA was used to offload data preprocessing of the general purpose computation unit. [@app112211073]](images/modeloptimization_preprocessor.png) -### SplitNets +#### SplitNets SplitNets were introduced in the context of Head-Mounted systems. They distribute the Deep Neural Networks (DNNs) workload among camera sensors and an aggregator. This is particularly compelling the in context of TinyML. The SplitNet framework is a split-aware NAS to find the optimal neural network architecture to achieve good accuracy, split the model among the sensors and the aggregator, and minimize the communication between the sensors and the aggregator. Minimal communication is important in TinyML where memory is highly constrained, this way the sensors conduct some of the processing on their chips and then they send only the necessary information to the aggregator. When testing on ImageNet, SplitNets were able to reduce the latency by one order of magnitude on head-mounted devices. This can be helpful when the sensor has its own chip. [@dong2022splitnets] ![A chart showing a comparison between the performance of SplitNets vs all on sensor and all on aggregator approaches. [@dong2022splitnets]](images/modeloptimization_SplitNets.png) -### Hardware Specific Data Augmentation +#### Hardware Specific Data Augmentation Each edge device may possess unique sensor characteristics, leading to specific noise patterns that can impact model performance. One example is audio data, where variations stemming from the choice of microphone are prevalent. Applications such as Keyword Spotting can experience substantial enhancements by incorporating data recorded from devices similar to those intended for deployment. Fine-tuning of existing models can be employed to adapt the data precisely to the sensor's distinctive characteristics. @@ -749,7 +762,7 @@ While all of the aforementioned techniques like [pruning](#sec-pruning), [quanti Without the extensive software innovation across frameworks, optimization tools and hardware integration, most of these techniques would remain theoretical or only viable to experts. Without framework APIs and automation to simplify applying these optimizations, they would not see adoption. Software support makes them accessible to general practitioners and unlocks real-world benefits. In addition, issues such as hyperparameter tuning for pruning, managing the trade-off between model size and accuracy, and ensuring compatibility with target devices pose hurdles that developers must navigate. -#### Built-in Optimization APIs +### Built-in Optimization APIs Major machine learning frameworks like TensorFlow, PyTorch, and MXNet provide libraries and APIs to allow common model optimization techniques to be applied without requiring custom implementations. For example, TensorFlow offers the TensorFlow Model Optimization Toolkit which contains modules like: @@ -761,7 +774,7 @@ These APIs allow users to enable optimization techniques like quantization and p The core benefit of built-in optimizations is that users can apply them without re-implementing complex techniques. This makes optimized models accessible to a broad range of practitioners. It also ensures best practices are followed by building on research and experience implementing the methods. As new optimizations emerge, frameworks strive to provide native support and APIs where possible to further lower the barrier to efficient ML. The availability of these tools is key to widespread adoption. -#### Automated Optimization Tools +### Automated Optimization Tools Automated optimization tools provided by frameworks can analyze models and automatically apply optimizations like quantization, pruning, and operator fusion to make the process easier and accessible without excessive manual tuning. In effect, this builds on top of the previous section. For example, TensorFlow provides the TensorFlow Model Optimization Toolkit which contains modules like: @@ -773,7 +786,7 @@ Automated optimization tools provided by frameworks can analyze models and autom These automated modules only require the user to provide the original floating point model, and handle the end-to-end optimization pipeline including any re-training to regain accuracy. Other frameworks like PyTorch also offer increasing automation support, for example through torch.quantization.quantize\_dynamic. Automated optimization makes efficient ML accessible to practitioners without optimization expertise. -#### Hardware Optimization Libraries +### Hardware Optimization Libraries Hardware libraries like TensorRT and TensorFlow XLA allow models to be highly optimized for target hardware through techniques that we discussed earlier. @@ -791,7 +804,7 @@ Profiling-based Tuning - We can use profiling tools to identify bottlenecks. For By integrating framework models with these hardware libraries through conversion and execution pipelines, ML developers can achieve significant speedups and efficiency gains from low-level optimizations tailored to the target hardware. The tight integration between software and hardware is key to enabling performant deployment of ML applications, especially on mobile and tinyML devices. -#### Visualizing Optimizations +### Visualizing Optimizations Implementing model optimization techniques without visibility into the effects on the model can be challenging. Dedicated tooling or visualization tools can provide critical and useful insight into model changes and helps track the optimization process. Let's consider the optimizations we considered earlier, such as pruning for sparsity and quantization. diff --git a/sustainable_ai.qmd b/sustainable_ai.qmd index cbf2eed0..8573a4e8 100644 --- a/sustainable_ai.qmd +++ b/sustainable_ai.qmd @@ -1,5 +1,7 @@ # Sustainable AI +![_DALL路E 3 Prompt: 3D illustration on a light background of a sustainable AI network interconnected with a myriad of eco-friendly energy sources. The AI actively manages and optimizes its energy from sources like solar arrays, wind turbines, and hydro dams, emphasizing power efficiency and performance. Deep neural networks spread throughout, receiving energy from these sustainable resources._](./images/cover_sustainable_ai.png) + ## Introduction Explanation: In this introductory section, we elucidate the significance of sustainability in the context of AI, emphasizing the necessity to address environmental, economic, and social dimensions to build resilient and sustainable AI systems.